Skip to content

Commit

Permalink
Fix SR anomaly score calculation at beginning (#5502)
Browse files Browse the repository at this point in the history
* adjust expected value

* update boundary calculation

* fix boundary

* adjust default values

* fix percent case

* fix error in anomaly score calculation

* adjust score calculation for first & second points

* fix sr do not report anomaly at beginning

* fix a issue in batch process

* remove a unused parameter

Co-authored-by: [email protected] <[email protected]>
  • Loading branch information
guinao and [email protected] authored Dec 2, 2020
1 parent 652abaa commit d257b88
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 3 deletions.
29 changes: 26 additions & 3 deletions src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,15 @@ public void Process()
_previousBatch = _previousBatch.GetRange(_batch.Count, _bLen);
_previousBatch.AddRange(_batch);
_modeler.Train(_previousBatch.ToArray(), ref _results);

// move the values to front
for (int i = 0; i < _batch.Count; ++i)
{
for (int j = 0; j < _outputLength; ++j)
{
_results[i][j] = _results[_bLen + i][j];
}
}
}
else
{
Expand All @@ -334,7 +343,7 @@ public ValueGetter<VBuffer<double>> CreateGetter(DataViewRowCursor input, string
double src = default;
srcGetter(ref src);
var result = VBufferEditor.Create(ref dst, _outputLength);
_results[input.Position % _batchSize + _bLen].CopyTo(result.Values);
_results[input.Position % _batchSize].CopyTo(result.Values);
dst = result.Commit();
};
return getter;
Expand All @@ -351,6 +360,15 @@ internal sealed class SrCnnEntireModeler
private static readonly double _deanomalyThreshold = 0.35;
private static readonly double _boundSensitivity = 93.0;
private static readonly double _unitForZero = 0.3;
private static readonly double _minimumScore = 0.0;
private static readonly double _maximumScore = 1.0;
// If the score window is smaller than this value, the anomaly score is tend to be small.
// Proof: For each point, the SR anomaly score is calculated as (w is average window size):
// (mag - avg_mag) / avg_mag
// = max (w * mag_{a} - sum_{i=0 to w-1} mag_{a - i}) / sum_{i=0 to w-1} mag_{a - i}
// = max ((w - 1) * mag_{a} + C) / (mag_{a} + C)
// <= w - 1
private static readonly int _minimumScoreWindowSize = (int)(_maximumScore * 10) + 1;

// pseudo-code to generate the factors.
// factors = []
Expand Down Expand Up @@ -577,15 +595,20 @@ private void SpectralResidual(double[] values, double[][] results, double thresh
{
_ifftMagList[i] = Math.Sqrt(_ifftRe[i] * _ifftRe[i] + _ifftIm[i] * _ifftIm[i]);
}

AverageFilter(_ifftMagList, Math.Min(_ifftMagList.Length, _judgementWindowSize));
for (int i = 0; i <= Math.Min(length, _minimumScoreWindowSize); ++i)
{
_cumSumList[i] = _cumSumList[Math.Min(length, _minimumScoreWindowSize) - 1];
}

// Step 7: Calculate raw score and set result
for (int i = 0; i < results.GetLength(0); ++i)
{
var score = CalculateScore(_ifftMagList[i], _cumSumList[i]);
score /= 10.0f;
score = Math.Min(score, 1);
score = Math.Max(score, 0);
score = Math.Min(score, _maximumScore);
score = Math.Max(score, _minimumScore);

var detres = score > threshold ? 1 : 0;

Expand Down
58 changes: 58 additions & 0 deletions test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,64 @@ public void TestSrCnnAnomalyDetectorWithSeasonalAnomalyData(
}
}

[Theory, CombinatorialData]
public void TestSrCnnAnomalyDetectorWithAnomalyAtBeginning(
[CombinatorialValues(SrCnnDeseasonalityMode.Stl, SrCnnDeseasonalityMode.Mean, SrCnnDeseasonalityMode.Median)] SrCnnDeseasonalityMode mode
)
{
var ml = new MLContext(1);
IDataView dataView;
List<TimeSeriesDataDouble> data;

var dataPath = GetDataPath("Timeseries", "anomaly_at_beginning.csv");

// Load data from file into the dataView
dataView = ml.Data.LoadFromTextFile<TimeSeriesDataDouble>(dataPath, hasHeader: true);
data = ml.Data.CreateEnumerable<TimeSeriesDataDouble>(dataView, reuseRowObject: false).ToList();

// Setup the detection arguments
string outputColumnName = nameof(SrCnnAnomalyDetection.Prediction);
string inputColumnName = nameof(TimeSeriesDataDouble.Value);

// Do batch anomaly detection
var options = new SrCnnEntireAnomalyDetectorOptions()
{
Threshold = 0.30,
BatchSize = -1,
Sensitivity = 80.0,
DetectMode = SrCnnDetectMode.AnomalyAndMargin,
Period = 0,
DeseasonalityMode = mode
};

var outputDataView = ml.AnomalyDetection.DetectEntireAnomalyBySrCnn(dataView, outputColumnName, inputColumnName, options);

// Getting the data of the newly created column as an IEnumerable of SrCnnAnomalyDetection.
var predictionColumn = ml.Data.CreateEnumerable<SrCnnAnomalyDetection>(
outputDataView, reuseRowObject: false);

var anomalyIndex = 1;

int k = 0;
foreach (var prediction in predictionColumn)
{
Assert.Equal(7, prediction.Prediction.Length);
if (anomalyIndex == k)
{
Assert.Equal(1, prediction.Prediction[0]);
Assert.True(prediction.Prediction[6] > data[k].Value || data[k].Value > prediction.Prediction[5]);
}
else
{
Assert.Equal(0, prediction.Prediction[0]);
Assert.True(prediction.Prediction[6] <= data[k].Value);
Assert.True(data[k].Value <= prediction.Prediction[5]);
}

++k;
}
}

[Theory, CombinatorialData]
public void TestSrcnnEntireDetectNonnegativeData(
[CombinatorialValues(true, false)] bool isPositive)
Expand Down
39 changes: 39 additions & 0 deletions test/data/Timeseries/anomaly_at_beginning.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
Value
181.944
37.176
57.14
67.128
72.12
77.112
82.104
83.1
87.09
92.088
92.01
97.08
102.072
107.05
107.06
117.048
122.04
132.024
147
151.82
151.992
151.72
151.94
156.969
156.984
156.92
161.976
161.94
161.97
166.968
176.952
181.94
186.936
201.91
201.912
201.9
206.904
216.88

0 comments on commit d257b88

Please sign in to comment.