diff --git a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs index 7cca7f893a..ddcbfd79d3 100644 --- a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs +++ b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs @@ -84,7 +84,7 @@ internal static class Defaults { public const double Threshold = 0.3; public const int BatchSize = 2000; - public const double Sensitivity = 55; + public const double Sensitivity = 70; public const SrCnnDetectMode DetectMode = SrCnnDetectMode.AnomalyOnly; public const int Period = 0; public const SrCnnDeseasonalityMode DeseasonalityMode = SrCnnDeseasonalityMode.Stl; @@ -349,36 +349,55 @@ internal sealed class SrCnnEntireModeler private static readonly int _judgementWindowSize = 40; private static readonly double _eps = 1e-8; private static readonly double _deanomalyThreshold = 0.35; - private static readonly double _boundSensitivity = 70.0; - - // A fixed lookup table which returns factor using sensitivity as index. - // Since Margin = BoundaryUnit * factor, this factor is calculated to make sure Margin == Boundary when sensitivity is 50, - // and increases/decreases exponentially as sensitivity increases/decreases. - // The factor array is generated by formula: - // f(x)=1, if x=50; - // f(x)=f(x+1)*(1.25+0.001*x), if 0<=x<50; - // f(x)=f(x+1)/(1.25+0.001*(x-50)), if 50(ref results, values.Length); } + _minimumOriginValue = Double.MaxValue; + _maximumOriginValue = Double.MinValue; + Array.Resize(ref _seriesToDetect, values.Length); for (int i = 0; i < values.Length; ++i) { _seriesToDetect[i] = values[i]; + _minimumOriginValue = Math.Min(_minimumOriginValue, values[i]); + _maximumOriginValue = Math.Max(_maximumOriginValue, values[i]); } if (_period > 0) @@ -641,7 +667,7 @@ private void GetExpectedValue(double[] values, double[][] results) for (int i = 0; i < results.Length; ++i) { - results[i][3] = _ifftRe[i]; + results[i][3] = AdjustExpectedValueBasedOnOriginalDataRange(_ifftRe[i]); } } @@ -650,7 +676,7 @@ private void GetExpectedValuePeriod(double[] values, double[][] results, IReadOn //Step 8: Calculate Expected Value for (int i = 0; i < values.Length; ++i) { - results[i][3] = values[i] - residual[i]; + results[i][3] = AdjustExpectedValueBasedOnOriginalDataRange(values[i] - residual[i]); } } @@ -762,7 +788,8 @@ private void GetMargin(double[] values, double[][] results, double sensitivity) { //Step 10: Calculate UpperBound and LowerBound var margin = CalculateMargin(_units[i], sensitivity); - results[i][3] = _ifftRe[i]; + results[i][3] = AdjustExpectedValueBasedOnOriginalDataRange(_ifftRe[i]); + results[i][4] = _units[i]; results[i][5] = _ifftRe[i] + margin; results[i][6] = _ifftRe[i] - margin; @@ -783,6 +810,21 @@ private void GetMargin(double[] values, double[][] results, double sensitivity) } } + // Adjust the expected value if original data range is non-negative or non-positive + private double AdjustExpectedValueBasedOnOriginalDataRange(double expectedValue) + { + if (_minimumOriginValue >= 0 && expectedValue < 0) + { + expectedValue = 0; + } + else if (_maximumOriginValue <= 0 && expectedValue > 0) + { + expectedValue = 0; + } + + return expectedValue; + } + // Adjust the expected value so that it is within the bound margin of value private double AdjustExpectedValueBasedOnBound(double value, double expectedValue, double unit) { @@ -880,18 +922,20 @@ private void CalculateExpectedValueByFft(double[] data) FftUtils.ComputeBackwardFft(_fftRe, _fftIm, _ifftRe, _ifftIm, length); } - private void CalculateBoundaryUnit(double[] data, bool[] isAnomalys) + private void CalculateBoundaryUnit(double[] data, bool[] isAnomalies) { int window = Math.Min(data.Length / 3, 512); double trendFraction = 0.5; // mix trend and average of trend double trendSum = 0; int calculationSize = 0; + bool closeToZero = true; MedianFilter(data, window, true); for (int i = 0; i < _trends.Length; ++i) { - if (!isAnomalys[i]) + if (!isAnomalies[i]) { + closeToZero = closeToZero && _trends[i] < _eps; trendSum += Math.Abs(_trends[i]); ++calculationSize; } @@ -910,10 +954,17 @@ private void CalculateBoundaryUnit(double[] data, bool[] isAnomalys) Array.Resize(ref _units, _trends.Length); for (int i = 0; i < _units.Length; ++i) { - _units[i] = Math.Max(1, averageTrendPart + Math.Abs(_trends[i]) * trendFraction); - if (double.IsInfinity(_units[i])) + if (closeToZero) + { + _units[i] = _unitForZero; + } + else { - throw new ArithmeticException("Not finite unit value"); + _units[i] = averageTrendPart + Math.Abs(_trends[i]) * trendFraction; + if (double.IsInfinity(_units[i])) + { + throw new ArithmeticException("Not finite unit value"); + } } } } @@ -1031,19 +1082,14 @@ private double CalculateAnomalyScore(double value, double exp, double unit, bool return anomalyScore; } - double distance = Math.Abs(exp - value); - List margins = new List(); - for (int i = 100; i >= 0; --i) - { - margins.Add(CalculateMargin(unit, i)); - } + double distanceFactor = Math.Abs(exp - value) / unit; int lb = 0; int ub = 100; while (lb < ub) { int mid = (lb + ub) / 2; - if (margins[mid] < distance) + if (_factors[100 - mid] < distanceFactor) { lb = mid + 1; } @@ -1053,15 +1099,15 @@ private double CalculateAnomalyScore(double value, double exp, double unit, bool } } - if (Math.Abs(margins[lb] - distance) < _eps || lb == 0) + if (_factors[100 - lb] == distanceFactor || lb == 0) { anomalyScore = lb; } else { - double lowerMargin = margins[lb - 1]; - double upperMargin = margins[lb]; - anomalyScore = lb - 1 + (distance - lowerMargin) / (upperMargin - lowerMargin); + double lowerMargin = _factors[101 - lb]; + double upperMargin = _factors[100 - lb]; + anomalyScore = lb - 1 + (distanceFactor - lowerMargin) / (upperMargin - lowerMargin); } return anomalyScore / 100.0f; diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index 1ffdf2ba96..3c81426021 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -621,7 +621,7 @@ public void TestSrCnnBatchAnomalyDetector( // Do batch anomaly detection var outputDataView = ml.AnomalyDetection.DetectEntireAnomalyBySrCnn(dataView, outputColumnName, inputColumnName, - threshold: 0.35, batchSize: batchSize, sensitivity: 90.0, mode); + threshold: 0.35, batchSize: batchSize, sensitivity: 98.0, mode); // Getting the data of the newly created column as an IEnumerable of SrCnnAnomalyDetection. var predictionColumn = ml.Data.CreateEnumerable( @@ -694,7 +694,7 @@ public void TestSrCnnAnomalyDetectorWithSeasonalData( { Threshold = 0.3, BatchSize = -1, - Sensitivity = 53.0, + Sensitivity = 64.0, DetectMode = SrCnnDetectMode.AnomalyAndMargin, Period = 288, DeseasonalityMode = mode @@ -741,7 +741,7 @@ public void TestSrCnnAnomalyDetectorWithSeasonalAnomalyData( { Threshold = 0.23, BatchSize = -1, - Sensitivity = 53.0, + Sensitivity = 63.0, DetectMode = SrCnnDetectMode.AnomalyAndMargin, Period = 288, DeseasonalityMode = mode @@ -776,6 +776,68 @@ public void TestSrCnnAnomalyDetectorWithSeasonalAnomalyData( } } + [Theory, CombinatorialData] + public void TestSrcnnEntireDetectNonnegativeData( + [CombinatorialValues(true, false)] bool isPositive) + { + var ml = new MLContext(1); + IDataView dataView; + List data; + + // Load data from file into the dataView + var dataPath = GetDataPath("Timeseries", "non_negative_case.csv"); + + // Load data from file into the dataView + dataView = ml.Data.LoadFromTextFile(dataPath, hasHeader: true); + data = ml.Data.CreateEnumerable(dataView, reuseRowObject: false).ToList(); + + if (!isPositive) + { + for (int i = 0; i < data.Count; ++i) + { + data[i].Value = - data[i].Value; + } + } + + dataView = ml.Data.LoadFromEnumerable(data); + + // Setup the detection arguments + string outputColumnName = nameof(SrCnnAnomalyDetection.Prediction); + string inputColumnName = nameof(TimeSeriesDataDouble.Value); + + // Do batch anomaly detection + var options = new SrCnnEntireAnomalyDetectorOptions() + { + Threshold = 0.10, + BatchSize = -1, + Sensitivity = 99.0, + DetectMode = SrCnnDetectMode.AnomalyAndMargin, + Period = 0, + DeseasonalityMode = SrCnnDeseasonalityMode.Stl + }; + + var outputDataView = ml.AnomalyDetection.DetectEntireAnomalyBySrCnn(dataView, outputColumnName, inputColumnName, options); + + // Getting the data of the newly created column as an IEnumerable of SrCnnAnomalyDetection. + var predictionColumn = ml.Data.CreateEnumerable( + outputDataView, reuseRowObject: false); + + if (isPositive) + { + foreach (var prediction in predictionColumn) + { + Assert.True(prediction.Prediction[3] >= 0); + } + } + else + { + foreach (var prediction in predictionColumn) + { + Assert.True(prediction.Prediction[3] <= 0); + } + } + } + [Fact] public void RootCauseLocalization() { diff --git a/test/data/Timeseries/non_negative_case.csv b/test/data/Timeseries/non_negative_case.csv new file mode 100644 index 0000000000..2dbf4b3fe5 --- /dev/null +++ b/test/data/Timeseries/non_negative_case.csv @@ -0,0 +1,70 @@ +Value +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +0 +5 +1 +4 +1 +4 +3 +3 +6 +3 +6 +4 +7 +9 +11 +28 +16 +35 +59 +42 +77 +91 +132 +189 +264 +258 +359 +493 +352 +550