diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsa.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsa.cs index 0e14dd6f6c..833fdceda7 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsa.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsa.cs @@ -1,29 +1,16 @@ using System; using System.Collections.Generic; +using System.IO; using Microsoft.ML; using Microsoft.ML.Data; +using Microsoft.ML.Transforms.TimeSeries; namespace Samples.Dynamic { public static class DetectChangePointBySsa { - class ChangePointPrediction - { - [VectorType(4)] - public double[] Prediction { get; set; } - } - - class SsaChangePointData - { - public float Value; - - public SsaChangePointData(float value) - { - Value = value; - } - } - // This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot). + // It demostrates stateful prediction engine that updates the state of the model and allows for saving/reloading. // The estimator is applied then to identify points where data distribution changed. // This estimator can account for temporal seasonality in the data. public static void Example() @@ -32,60 +19,119 @@ public static void Example() // as well as the source of randomness. var ml = new MLContext(); - // Generate sample series data with a recurring pattern and then a change in trend + // Generate sample series data with a recurring pattern const int SeasonalitySize = 5; const int TrainingSeasons = 3; const int TrainingSize = SeasonalitySize * TrainingSeasons; - var data = new List(); - for (int i = 0; i < TrainingSeasons; i++) - for (int j = 0; j < SeasonalitySize; j++) - data.Add(new SsaChangePointData(j)); - // This is a change point - for (int i = 0; i < SeasonalitySize; i++) - data.Add(new SsaChangePointData(i * 100)); + var data = new List() + { + new TimeSeriesData(0), + new TimeSeriesData(1), + new TimeSeriesData(2), + new TimeSeriesData(3), + new TimeSeriesData(4), + + new TimeSeriesData(0), + new TimeSeriesData(1), + new TimeSeriesData(2), + new TimeSeriesData(3), + new TimeSeriesData(4), + + new TimeSeriesData(0), + new TimeSeriesData(1), + new TimeSeriesData(2), + new TimeSeriesData(3), + new TimeSeriesData(4), + }; // Convert data to IDataView. var dataView = ml.Data.LoadFromEnumerable(data); - // Setup estimator arguments - var inputColumnName = nameof(SsaChangePointData.Value); + // Setup SsaChangePointDetector arguments + var inputColumnName = nameof(TimeSeriesData.Value); var outputColumnName = nameof(ChangePointPrediction.Prediction); - // The transformed data. - var transformedData = ml.Transforms.DetectChangePointBySsa(outputColumnName, inputColumnName, 95, 8, TrainingSize, SeasonalitySize + 1).Fit(dataView).Transform(dataView); + // Train the change point detector. + ITransformer model = ml.Transforms.DetectChangePointBySsa(outputColumnName, inputColumnName, 95, 8, TrainingSize, SeasonalitySize + 1).Fit(dataView); - // Getting the data of the newly created column as an IEnumerable of ChangePointPrediction. - var predictionColumn = ml.Data.CreateEnumerable(transformedData, reuseRowObject: false); + // Create a prediction engine from the model for feeding new data. + var engine = model.CreateTimeSeriesPredictionFunction(ml); - Console.WriteLine($"{outputColumnName} column obtained post-transformation."); + // Start streaming new data points with no change point to the prediction engine. + Console.WriteLine($"Output from ChangePoint predictions on new data:"); Console.WriteLine("Data\tAlert\tScore\tP-Value\tMartingale value"); - int k = 0; - foreach (var prediction in predictionColumn) - Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", data[k++].Value, prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]); - Console.WriteLine(""); - - // Prediction column obtained post-transformation. + + // Output from ChangePoint predictions on new data: // Data Alert Score P-Value Martingale value - // 0 0 - 2.53 0.50 0.00 - // 1 0 - 0.01 0.01 0.00 - // 2 0 0.76 0.14 0.00 - // 3 0 0.69 0.28 0.00 - // 4 0 1.44 0.18 0.00 - // 0 0 - 1.84 0.17 0.00 - // 1 0 0.22 0.44 0.00 - // 2 0 0.20 0.45 0.00 - // 3 0 0.16 0.47 0.00 - // 4 0 1.33 0.18 0.00 - // 0 0 - 1.79 0.07 0.00 - // 1 0 0.16 0.50 0.00 - // 2 0 0.09 0.50 0.00 - // 3 0 0.08 0.45 0.00 - // 4 0 1.31 0.12 0.00 - // 0 0 - 1.79 0.07 0.00 - // 100 1 99.16 0.00 4031.94 <-- alert is on, predicted changepoint - // 200 0 185.23 0.00 731260.87 - // 300 0 270.40 0.01 3578470.47 - // 400 0 357.11 0.03 45298370.86 + + for (int i = 0; i < 5; i++) + PrintPrediction(i, engine.Predict(new TimeSeriesData(i))); + + // 0 0 -1.01 0.50 0.00 + // 1 0 -0.24 0.22 0.00 + // 2 0 -0.31 0.30 0.00 + // 3 0 0.44 0.01 0.00 + // 4 0 2.16 0.00 0.24 + + // Now stream data points that reflect a change in trend. + for (int i = 0; i < 5; i++) + { + int value = (i + 1) * 100; + PrintPrediction(value, engine.Predict(new TimeSeriesData(value))); + } + // 100 0 86.23 0.00 2076098.24 + // 200 0 171.38 0.00 809668524.21 + // 300 1 256.83 0.01 22130423541.93 <-- alert is on, note that delay is expected + // 400 0 326.55 0.04 241162710263.29 + // 500 0 364.82 0.08 597660527041.45 <-- saved to disk + + // Now we demonstrate saving and loading the model. + + // Save the model that exists within the prediction engine. + // The engine has been updating this model with every new data point. + var modelPath = "model.zip"; + engine.CheckPoint(ml, modelPath); + + // Load the model. + using (var file = File.OpenRead(modelPath)) + model = ml.Model.Load(file, out DataViewSchema schema); + + // We must create a new prediction engine from the persisted model. + engine = model.CreateTimeSeriesPredictionFunction(ml); + + // Run predictions on the loaded model. + for (int i = 0; i < 5; i++) + { + int value = (i + 1) * 100; + PrintPrediction(value, engine.Predict(new TimeSeriesData(value))); + } + + // 100 0 -58.58 0.15 1096021098844.34 <-- loaded from disk and running new predictions + // 200 0 -41.24 0.20 97579154688.98 + // 300 0 -30.61 0.24 95319753.87 + // 400 0 58.87 0.38 14.24 + // 500 0 219.28 0.36 0.05 + + } + + private static void PrintPrediction(float value, ChangePointPrediction prediction) => + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", value, prediction.Prediction[0], + prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]); + + class ChangePointPrediction + { + [VectorType(4)] + public double[] Prediction { get; set; } + } + + class TimeSeriesData + { + public float Value; + + public TimeSeriesData(float value) + { + Value = value; + } } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaBatchPrediction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaBatchPrediction.cs new file mode 100644 index 0000000000..2e0df19007 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaBatchPrediction.cs @@ -0,0 +1,114 @@ +using System; +using System.Collections.Generic; +using Microsoft.ML; +using Microsoft.ML.Data; + +namespace Samples.Dynamic +{ + public static class DetectChangePointBySsaBatchPrediction + { + // This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot). + // The estimator is applied then to identify points where data distribution changed. + // This estimator can account for temporal seasonality in the data. + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var ml = new MLContext(); + + // Generate sample series data with a recurring pattern and then a change in trend + const int SeasonalitySize = 5; + const int TrainingSeasons = 3; + const int TrainingSize = SeasonalitySize * TrainingSeasons; + var data = new List() + { + new TimeSeriesData(0), + new TimeSeriesData(1), + new TimeSeriesData(2), + new TimeSeriesData(3), + new TimeSeriesData(4), + + new TimeSeriesData(0), + new TimeSeriesData(1), + new TimeSeriesData(2), + new TimeSeriesData(3), + new TimeSeriesData(4), + + new TimeSeriesData(0), + new TimeSeriesData(1), + new TimeSeriesData(2), + new TimeSeriesData(3), + new TimeSeriesData(4), + + //This is a change point + new TimeSeriesData(0), + new TimeSeriesData(100), + new TimeSeriesData(200), + new TimeSeriesData(300), + new TimeSeriesData(400), + }; + + // Convert data to IDataView. + var dataView = ml.Data.LoadFromEnumerable(data); + + // Setup estimator arguments + var inputColumnName = nameof(TimeSeriesData.Value); + var outputColumnName = nameof(ChangePointPrediction.Prediction); + + // The transformed data. + var transformedData = ml.Transforms.DetectChangePointBySsa(outputColumnName, inputColumnName, 95, 8, TrainingSize, SeasonalitySize + 1).Fit(dataView).Transform(dataView); + + // Getting the data of the newly created column as an IEnumerable of ChangePointPrediction. + var predictionColumn = ml.Data.CreateEnumerable(transformedData, reuseRowObject: false); + + Console.WriteLine($"{outputColumnName} column obtained post-transformation."); + Console.WriteLine("Data\tAlert\tScore\tP-Value\tMartingale value"); + int k = 0; + foreach (var prediction in predictionColumn) + PrintPrediction(data[k++].Value, prediction); + + // Prediction column obtained post-transformation. + // Data Alert Score P-Value Martingale value + // 0 0 -2.53 0.50 0.00 + // 1 0 -0.01 0.01 0.00 + // 2 0 0.76 0.14 0.00 + // 3 0 0.69 0.28 0.00 + // 4 0 1.44 0.18 0.00 + // 0 0 -1.84 0.17 0.00 + // 1 0 0.22 0.44 0.00 + // 2 0 0.20 0.45 0.00 + // 3 0 0.16 0.47 0.00 + // 4 0 1.33 0.18 0.00 + // 0 0 -1.79 0.07 0.00 + // 1 0 0.16 0.50 0.00 + // 2 0 0.09 0.50 0.00 + // 3 0 0.08 0.45 0.00 + // 4 0 1.31 0.12 0.00 + // 0 0 -1.79 0.07 0.00 + // 100 1 99.16 0.00 4031.94 <-- alert is on, predicted changepoint + // 200 0 185.23 0.00 731260.87 + // 300 0 270.40 0.01 3578470.47 + // 400 0 357.11 0.03 45298370.86 + } + + private static void PrintPrediction(float value, ChangePointPrediction prediction) => + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", value, prediction.Prediction[0], + prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]); + + class ChangePointPrediction + { + [VectorType(4)] + public double[] Prediction { get; set; } + } + + class TimeSeriesData + { + public float Value; + + public TimeSeriesData(float value) + { + Value = value; + } + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePoint.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePoint.cs index 268c28238a..2890c4ec57 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePoint.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePoint.cs @@ -4,29 +4,15 @@ using System; using System.Collections.Generic; +using System.IO; using Microsoft.ML; using Microsoft.ML.Data; +using Microsoft.ML.Transforms.TimeSeries; namespace Samples.Dynamic { public static class DetectIidChangePoint { - class ChangePointPrediction - { - [VectorType(4)] - public double[] Prediction { get; set; } - } - - class IidChangePointData - { - public float Value; - - public IidChangePointData(float value) - { - Value = value; - } - } - // This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot). // The estimator is applied then to identify points where data distribution changed. public static void Example() @@ -37,35 +23,54 @@ public static void Example() // Generate sample series data with a change const int Size = 16; - var data = new List(Size); - for (int i = 0; i < Size / 2; i++) - data.Add(new IidChangePointData(5)); - // This is a change point - for (int i = 0; i < Size / 2; i++) - data.Add(new IidChangePointData(7)); + var data = new List(Size) + { + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + + //Change point data. + new TimeSeriesData(7), + new TimeSeriesData(7), + new TimeSeriesData(7), + new TimeSeriesData(7), + new TimeSeriesData(7), + new TimeSeriesData(7), + new TimeSeriesData(7), + new TimeSeriesData(7), + }; // Convert data to IDataView. var dataView = ml.Data.LoadFromEnumerable(data); - // Setup estimator arguments + // Setup IidSpikeDetector arguments string outputColumnName = nameof(ChangePointPrediction.Prediction); - string inputColumnName = nameof(IidChangePointData.Value); + string inputColumnName = nameof(TimeSeriesData.Value); - // The transformed data. - var transformedData = ml.Transforms.DetectIidChangePoint(outputColumnName, inputColumnName, 95, Size / 4).Fit(dataView).Transform(dataView); + // Time Series model. + ITransformer model = ml.Transforms.DetectIidChangePoint(outputColumnName, inputColumnName, 95, Size / 4).Fit(dataView); - // Getting the data of the newly created column as an IEnumerable of ChangePointPrediction. - var predictionColumn = ml.Data.CreateEnumerable(transformedData, reuseRowObject: false); + // Create a time series prediction engine from the model. + var engine = model.CreateTimeSeriesPredictionFunction(ml); Console.WriteLine($"{outputColumnName} column obtained post-transformation."); Console.WriteLine("Data\tAlert\tScore\tP-Value\tMartingale value"); - int k = 0; - foreach (var prediction in predictionColumn) - Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", data[k++].Value, prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]); - Console.WriteLine(""); - - // Prediction column obtained post-transformation. + // Data Alert Score P-Value Martingale value + + // Create non-anomalous data and check for change point. + for (int index = 0; index < 8; index++) + { + // Anomaly change point detection. + PrintPrediction(5, engine.Predict(new TimeSeriesData(5))); + } + + // 5 0 5.00 0.50 0.00 <-- Time Series 1. // 5 0 5.00 0.50 0.00 // 5 0 5.00 0.50 0.00 // 5 0 5.00 0.50 0.00 @@ -73,9 +78,50 @@ public static void Example() // 5 0 5.00 0.50 0.00 // 5 0 5.00 0.50 0.00 // 5 0 5.00 0.50 0.00 - // 5 0 5.00 0.50 0.00 - // 7 1 7.00 0.00 10298.67 <-- alert is on, predicted changepoint - // 7 0 7.00 0.13 33950.16 + + // Change point + PrintPrediction(7, engine.Predict(new TimeSeriesData(7))); + + // 7 1 7.00 0.00 10298.67 <-- alert is on, predicted changepoint (and model is checkpointed). + + // Checkpoint the model. + var modelPath = "temp.zip"; + engine.CheckPoint(ml, modelPath); + + // Reference to current time series engine because in the next step "engine" will point to the + // checkpointed model being loaded from disk. + var timeseries1 = engine; + + // Load the model. + using (var file = File.OpenRead(modelPath)) + model = ml.Model.Load(file, out DataViewSchema schema); + + // Create a time series prediction engine from the checkpointed model. + engine = model.CreateTimeSeriesPredictionFunction(ml); + for (int index = 0; index < 8; index++) + { + // Anomaly change point detection. + PrintPrediction(7, engine.Predict(new TimeSeriesData(7))); + } + + // 7 0 7.00 0.13 33950.16 <-- Time Series 2 : Model loaded back from disk and prediction is made. + // 7 0 7.00 0.26 60866.34 + // 7 0 7.00 0.38 78362.04 + // 7 0 7.00 0.50 0.01 + // 7 0 7.00 0.50 0.00 + // 7 0 7.00 0.50 0.00 + // 7 0 7.00 0.50 0.00 + + // Prediction from the original time series engine should match the prediction from + // check pointed model. + engine = timeseries1; + for (int index = 0; index < 8; index++) + { + // Anomaly change point detection. + PrintPrediction(7, engine.Predict(new TimeSeriesData(7))); + } + + // 7 0 7.00 0.13 33950.16 <-- Time Series 1 and prediction is made. // 7 0 7.00 0.26 60866.34 // 7 0 7.00 0.38 78362.04 // 7 0 7.00 0.50 0.01 @@ -83,5 +129,25 @@ public static void Example() // 7 0 7.00 0.50 0.00 // 7 0 7.00 0.50 0.00 } + + private static void PrintPrediction(float value, ChangePointPrediction prediction) => + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", value, prediction.Prediction[0], + prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]); + + class ChangePointPrediction + { + [VectorType(4)] + public double[] Prediction { get; set; } + } + + class TimeSeriesData + { + public float Value; + + public TimeSeriesData(float value) + { + Value = value; + } + } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePointBatchPrediction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePointBatchPrediction.cs new file mode 100644 index 0000000000..760305df33 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePointBatchPrediction.cs @@ -0,0 +1,105 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using Microsoft.ML; +using Microsoft.ML.Data; + +namespace Samples.Dynamic +{ + public static class DetectIidChangePointBatchPrediction + { + // This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot). + // The estimator is applied then to identify points where data distribution changed. + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var ml = new MLContext(); + + // Generate sample series data with a change + const int Size = 16; + var data = new List(Size) + { + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + + //Change point data. + new TimeSeriesData(7), + new TimeSeriesData(7), + new TimeSeriesData(7), + new TimeSeriesData(7), + new TimeSeriesData(7), + new TimeSeriesData(7), + new TimeSeriesData(7), + new TimeSeriesData(7), + }; + + // Convert data to IDataView. + var dataView = ml.Data.LoadFromEnumerable(data); + + // Setup estimator arguments + string outputColumnName = nameof(ChangePointPrediction.Prediction); + string inputColumnName = nameof(TimeSeriesData.Value); + + // The transformed data. + var transformedData = ml.Transforms.DetectIidChangePoint(outputColumnName, inputColumnName, 95, Size / 4).Fit(dataView).Transform(dataView); + + // Getting the data of the newly created column as an IEnumerable of ChangePointPrediction. + var predictionColumn = ml.Data.CreateEnumerable(transformedData, reuseRowObject: false); + + Console.WriteLine($"{outputColumnName} column obtained post-transformation."); + Console.WriteLine("Data\tAlert\tScore\tP-Value\tMartingale value"); + int k = 0; + foreach (var prediction in predictionColumn) + PrintPrediction(data[k++].Value, prediction); + + // Prediction column obtained post-transformation. + // Data Alert Score P-Value Martingale value + // 5 0 5.00 0.50 0.00 + // 5 0 5.00 0.50 0.00 + // 5 0 5.00 0.50 0.00 + // 5 0 5.00 0.50 0.00 + // 5 0 5.00 0.50 0.00 + // 5 0 5.00 0.50 0.00 + // 5 0 5.00 0.50 0.00 + // 5 0 5.00 0.50 0.00 + // 7 1 7.00 0.00 10298.67 <-- alert is on, predicted changepoint + // 7 0 7.00 0.13 33950.16 + // 7 0 7.00 0.26 60866.34 + // 7 0 7.00 0.38 78362.04 + // 7 0 7.00 0.50 0.01 + // 7 0 7.00 0.50 0.00 + // 7 0 7.00 0.50 0.00 + // 7 0 7.00 0.50 0.00 + } + + private static void PrintPrediction(float value, ChangePointPrediction prediction) => + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", value, prediction.Prediction[0], + prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]); + + class ChangePointPrediction + { + [VectorType(4)] + public double[] Prediction { get; set; } + } + + class TimeSeriesData + { + public float Value; + + public TimeSeriesData(float value) + { + Value = value; + } + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpike.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpike.cs index 862c390ac7..439006a7ec 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpike.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpike.cs @@ -9,22 +9,6 @@ namespace Samples.Dynamic { public static class DetectIidSpike { - class IidSpikeData - { - public float Value; - - public IidSpikeData(float value) - { - Value = value; - } - } - - class IidSpikePrediction - { - [VectorType(3)] - public double[] Prediction { get; set; } - } - // This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot). // The estimator is applied then to identify spiking points in the series. public static void Example() @@ -35,46 +19,101 @@ public static void Example() // Generate sample series data with a spike const int Size = 10; - var data = new List(Size); - for (int i = 0; i < Size / 2; i++) - data.Add(new IidSpikeData(5)); - // This is a spike - data.Add(new IidSpikeData(10)); - for (int i = 0; i < Size / 2; i++) - data.Add(new IidSpikeData(5)); + var data = new List(Size + 1) + { + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + + // This is a spike. + new TimeSeriesData(10), + + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + }; // Convert data to IDataView. var dataView = ml.Data.LoadFromEnumerable(data); - // Setup the estimator arguments + // Setup IidSpikeDetector arguments string outputColumnName = nameof(IidSpikePrediction.Prediction); - string inputColumnName = nameof(IidSpikeData.Value); + string inputColumnName = nameof(TimeSeriesData.Value); - // The transformed data. - var transformedData = ml.Transforms.DetectIidSpike(outputColumnName, inputColumnName, 95, Size / 4).Fit(dataView).Transform(dataView); + // The transformed model. + ITransformer model = ml.Transforms.DetectIidSpike(outputColumnName, inputColumnName, 95, Size).Fit(dataView); - // Getting the data of the newly created column as an IEnumerable of IidSpikePrediction. - var predictionColumn = ml.Data.CreateEnumerable(transformedData, reuseRowObject: false); + // Create a time series prediction engine from the model. + var engine = model.CreateTimeSeriesPredictionFunction(ml); Console.WriteLine($"{outputColumnName} column obtained post-transformation."); - Console.WriteLine("Alert\tScore\tP-Value"); - foreach (var prediction in predictionColumn) - Console.WriteLine("{0}\t{1:0.00}\t{2:0.00}", prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2]); - Console.WriteLine(""); - + Console.WriteLine("Data\tAlert\tScore\tP-Value"); + // Prediction column obtained post-transformation. - // Alert Score P-Value - // 0 5.00 0.50 - // 0 5.00 0.50 - // 0 5.00 0.50 - // 0 5.00 0.50 - // 0 5.00 0.50 - // 1 10.00 0.00 <-- alert is on, predicted spike - // 0 5.00 0.26 - // 0 5.00 0.26 - // 0 5.00 0.50 - // 0 5.00 0.50 - // 0 5.00 0.50 + // Data Alert Score P-Value + + // Create non-anomalous data and check for anomaly. + for (int index = 0; index < 5; index++) + { + // Anomaly spike detection. + PrintPrediction(5, engine.Predict(new TimeSeriesData(5))); + } + + // 5 0 5.00 0.50 + // 5 0 5.00 0.50 + // 5 0 5.00 0.50 + // 5 0 5.00 0.50 + // 5 0 5.00 0.50 + + // Spike. + PrintPrediction(10, engine.Predict(new TimeSeriesData(10))); + + // 10 1 10.00 0.00 <-- alert is on, predicted spike (check-point model) + + // Checkpoint the model. + var modelPath = "temp.zip"; + engine.CheckPoint(ml, modelPath); + + // Load the model. + using (var file = File.OpenRead(modelPath)) + model = ml.Model.Load(file, out DataViewSchema schema); + + for (int index = 0; index < 5; index++) + { + // Anomaly spike detection. + PrintPrediction(5, engine.Predict(new TimeSeriesData(5))); + } + + // 5 0 5.00 0.26 <-- load model from disk. + // 5 0 5.00 0.26 + // 5 0 5.00 0.50 + // 5 0 5.00 0.50 + // 5 0 5.00 0.50 + + } + + private static void PrintPrediction(float value, IidSpikePrediction prediction) => + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", value, prediction.Prediction[0], + prediction.Prediction[1], prediction.Prediction[2]); + + class TimeSeriesData + { + public float Value; + + public TimeSeriesData(float value) + { + Value = value; + } + } + + class IidSpikePrediction + { + [VectorType(3)] + public double[] Prediction { get; set; } } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpikeBatchPrediction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpikeBatchPrediction.cs new file mode 100644 index 0000000000..4145214918 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpikeBatchPrediction.cs @@ -0,0 +1,93 @@ +using System; +using System.Collections.Generic; +using Microsoft.ML; +using Microsoft.ML.Data; + +namespace Samples.Dynamic +{ + public static class DetectIidSpikeBatchPrediction + { + // This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot). + // The estimator is applied then to identify spiking points in the series. + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var ml = new MLContext(); + + // Generate sample series data with a spike + const int Size = 10; + var data = new List(Size + 1) + { + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + + // This is a spike. + new TimeSeriesData(10), + + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + }; + + // Convert data to IDataView. + var dataView = ml.Data.LoadFromEnumerable(data); + + // Setup the estimator arguments + string outputColumnName = nameof(IidSpikePrediction.Prediction); + string inputColumnName = nameof(TimeSeriesData.Value); + + // The transformed data. + var transformedData = ml.Transforms.DetectIidSpike(outputColumnName, inputColumnName, 95, Size / 4).Fit(dataView).Transform(dataView); + + // Getting the data of the newly created column as an IEnumerable of IidSpikePrediction. + var predictionColumn = ml.Data.CreateEnumerable(transformedData, reuseRowObject: false); + + Console.WriteLine($"{outputColumnName} column obtained post-transformation."); + Console.WriteLine("Data\tAlert\tScore\tP-Value"); + + int k = 0; + foreach (var prediction in predictionColumn) + PrintPrediction(data[k++].Value, prediction); + + // Prediction column obtained post-transformation. + // Data Alert Score P-Value + // 5 0 5.00 0.50 + // 5 0 5.00 0.50 + // 5 0 5.00 0.50 + // 5 0 5.00 0.50 + // 5 0 5.00 0.50 + // 10 1 10.00 0.00 <-- alert is on, predicted spike + // 5 0 5.00 0.26 + // 5 0 5.00 0.26 + // 5 0 5.00 0.50 + // 5 0 5.00 0.50 + // 5 0 5.00 0.50 + } + + private static void PrintPrediction(float value, IidSpikePrediction prediction) => + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", value, prediction.Prediction[0], + prediction.Prediction[1], prediction.Prediction[2]); + + class TimeSeriesData + { + public float Value; + + public TimeSeriesData(float value) + { + Value = value; + } + } + + class IidSpikePrediction + { + [VectorType(3)] + public double[] Prediction { get; set; } + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsa.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsa.cs index c7293052f2..38094bb672 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsa.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsa.cs @@ -1,28 +1,14 @@ using System; using System.Collections.Generic; +using System.IO; using Microsoft.ML; using Microsoft.ML.Data; +using Microsoft.ML.Transforms.TimeSeries; namespace Samples.Dynamic { public static class DetectSpikeBySsa { - class SsaSpikeData - { - public float Value; - - public SsaSpikeData(float value) - { - Value = value; - } - } - - class SsaSpikePrediction - { - [VectorType(3)] - public double[] Prediction { get; set; } - } - // This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot). // The estimator is applied then to identify spiking points in the series. // This estimator can account for temporal seasonality in the data. @@ -32,62 +18,113 @@ public static void Example() // as well as the source of randomness. var ml = new MLContext(); - // Generate sample series data with a recurring pattern and a spike within the pattern + // Generate sample series data with a recurring pattern const int SeasonalitySize = 5; const int TrainingSeasons = 3; const int TrainingSize = SeasonalitySize * TrainingSeasons; - var data = new List(); - for (int i = 0; i < TrainingSeasons; i++) - for (int j = 0; j < SeasonalitySize; j++) - data.Add(new SsaSpikeData(j)); - // This is a spike - data.Add(new SsaSpikeData(100)); - for (int i = 0; i < SeasonalitySize; i++) - data.Add(new SsaSpikeData(i)); + var data = new List() + { + new TimeSeriesData(0), + new TimeSeriesData(1), + new TimeSeriesData(2), + new TimeSeriesData(3), + new TimeSeriesData(4), + + new TimeSeriesData(0), + new TimeSeriesData(1), + new TimeSeriesData(2), + new TimeSeriesData(3), + new TimeSeriesData(4), + + new TimeSeriesData(0), + new TimeSeriesData(1), + new TimeSeriesData(2), + new TimeSeriesData(3), + new TimeSeriesData(4), + }; // Convert data to IDataView. var dataView = ml.Data.LoadFromEnumerable(data); - // Setup estimator arguments - var inputColumnName = nameof(SsaSpikeData.Value); + // Setup IidSpikeDetector arguments + var inputColumnName = nameof(TimeSeriesData.Value); var outputColumnName = nameof(SsaSpikePrediction.Prediction); - // The transformed data. - var transformedData = ml.Transforms.DetectSpikeBySsa(outputColumnName, inputColumnName, 95, 8, TrainingSize, SeasonalitySize + 1).Fit(dataView).Transform(dataView); + // Train the change point detector. + ITransformer model = ml.Transforms.DetectSpikeBySsa(outputColumnName, inputColumnName, 95, 8, TrainingSize, SeasonalitySize + 1).Fit(dataView); - // Getting the data of the newly created column as an IEnumerable of SsaSpikePrediction. - var predictionColumn = ml.Data.CreateEnumerable(transformedData, reuseRowObject: false); + // Create a prediction engine from the model for feeding new data. + var engine = model.CreateTimeSeriesPredictionFunction(ml); - Console.WriteLine($"{outputColumnName} column obtained post-transformation."); + // Start streaming new data points with no change point to the prediction engine. + Console.WriteLine($"Output from spike predictions on new data:"); Console.WriteLine("Data\tAlert\tScore\tP-Value"); - int k = 0; - foreach (var prediction in predictionColumn) - Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", data[k++].Value, prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2]); - Console.WriteLine(""); - // Prediction column obtained post-transformation. + // Output from spike predictions on new data: // Data Alert Score P-Value - // 0 0 - 2.53 0.50 - // 1 0 - 0.01 0.01 - // 2 0 0.76 0.14 - // 3 0 0.69 0.28 - // 4 0 1.44 0.18 - // 0 0 - 1.84 0.17 - // 1 0 0.22 0.44 - // 2 0 0.20 0.45 - // 3 0 0.16 0.47 - // 4 0 1.33 0.18 - // 0 0 - 1.79 0.07 - // 1 0 0.16 0.50 - // 2 0 0.09 0.50 - // 3 0 0.08 0.45 - // 4 0 1.31 0.12 - // 100 1 98.21 0.00 <-- alert is on, predicted spike - // 0 0 - 13.83 0.29 - // 1 0 - 1.74 0.44 - // 2 0 - 0.47 0.46 - // 3 0 - 16.50 0.29 - // 4 0 - 29.82 0.21 + + for (int j = 0; j < 2; j++) + for (int i = 0; i < 5; i++) + PrintPrediction(i, engine.Predict(new TimeSeriesData(i))); + + // 0 0 -1.01 0.50 + // 1 0 -0.24 0.22 + // 2 0 -0.31 0.30 + // 3 0 0.44 0.01 + // 4 0 2.16 0.00 + // 0 0 -0.78 0.27 + // 1 0 -0.80 0.30 + // 2 0 -0.84 0.31 + // 3 0 0.33 0.31 + // 4 0 2.21 0.07 + + // Now send a data point that reflects a spike. + PrintPrediction(100, engine.Predict(new TimeSeriesData(100))); + + // 100 1 86.17 0.00 <-- alert is on, predicted spike + + // Now we demonstrate saving and loading the model. + // Save the model that exists within the prediction engine. + // The engine has been updating this model with every new data point. + var modelPath = "model.zip"; + engine.CheckPoint(ml, modelPath); + + // Load the model. + using (var file = File.OpenRead(modelPath)) + model = ml.Model.Load(file, out DataViewSchema schema); + + // We must create a new prediction engine from the persisted model. + engine = model.CreateTimeSeriesPredictionFunction(ml); + + // Run predictions on the loaded model. + for (int i = 0; i < 5; i++) + PrintPrediction(i, engine.Predict(new TimeSeriesData(i))); + + // 0 0 -2.74 0.40 <-- saved to disk, re-loaded, and running new predictions + // 1 0 -1.47 0.42 + // 2 0 -17.50 0.24 + // 3 0 -30.82 0.16 + // 4 0 -23.24 0.28 + } + + private static void PrintPrediction(float value, SsaSpikePrediction prediction) => + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", value, prediction.Prediction[0], + prediction.Prediction[1], prediction.Prediction[2]); + + class TimeSeriesData + { + public float Value; + + public TimeSeriesData(float value) + { + Value = value; + } + } + + class SsaSpikePrediction + { + [VectorType(3)] + public double[] Prediction { get; set; } } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsaBatchPrediction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsaBatchPrediction.cs new file mode 100644 index 0000000000..cc6a798dcf --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsaBatchPrediction.cs @@ -0,0 +1,117 @@ +using System; +using System.Collections.Generic; +using Microsoft.ML; +using Microsoft.ML.Data; + +namespace Samples.Dynamic +{ + public static class DetectSpikeBySsaBatchPrediction + { + // This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot). + // The estimator is applied then to identify spiking points in the series. + // This estimator can account for temporal seasonality in the data. + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var ml = new MLContext(); + + // Generate sample series data with a recurring pattern and a spike within the pattern + const int SeasonalitySize = 5; + const int TrainingSeasons = 3; + const int TrainingSize = SeasonalitySize * TrainingSeasons; + var data = new List() + { + new TimeSeriesData(0), + new TimeSeriesData(1), + new TimeSeriesData(2), + new TimeSeriesData(3), + new TimeSeriesData(4), + + new TimeSeriesData(0), + new TimeSeriesData(1), + new TimeSeriesData(2), + new TimeSeriesData(3), + new TimeSeriesData(4), + + new TimeSeriesData(0), + new TimeSeriesData(1), + new TimeSeriesData(2), + new TimeSeriesData(3), + new TimeSeriesData(4), + + //This is a spike. + new TimeSeriesData(100), + + new TimeSeriesData(0), + new TimeSeriesData(1), + new TimeSeriesData(2), + new TimeSeriesData(3), + new TimeSeriesData(4), + }; + + // Convert data to IDataView. + var dataView = ml.Data.LoadFromEnumerable(data); + + // Setup estimator arguments + var inputColumnName = nameof(TimeSeriesData.Value); + var outputColumnName = nameof(SsaSpikePrediction.Prediction); + + // The transformed data. + var transformedData = ml.Transforms.DetectSpikeBySsa(outputColumnName, inputColumnName, 95, 8, TrainingSize, SeasonalitySize + 1).Fit(dataView).Transform(dataView); + + // Getting the data of the newly created column as an IEnumerable of SsaSpikePrediction. + var predictionColumn = ml.Data.CreateEnumerable(transformedData, reuseRowObject: false); + + Console.WriteLine($"{outputColumnName} column obtained post-transformation."); + Console.WriteLine("Data\tAlert\tScore\tP-Value"); + int k = 0; + foreach (var prediction in predictionColumn) + PrintPrediction(data[k++].Value, prediction); + + // Prediction column obtained post-transformation. + // Data Alert Score P-Value + // 0 0 -2.53 0.50 + // 1 0 -0.01 0.01 + // 2 0 0.76 0.14 + // 3 0 0.69 0.28 + // 4 0 1.44 0.18 + // 0 0 -1.84 0.17 + // 1 0 0.22 0.44 + // 2 0 0.20 0.45 + // 3 0 0.16 0.47 + // 4 0 1.33 0.18 + // 0 0 -1.79 0.07 + // 1 0 0.16 0.50 + // 2 0 0.09 0.50 + // 3 0 0.08 0.45 + // 4 0 1.31 0.12 + // 100 1 98.21 0.00 <-- alert is on, predicted spike + // 0 0 -13.83 0.29 + // 1 0 -1.74 0.44 + // 2 0 -0.47 0.46 + // 3 0 -16.50 0.29 + // 4 0 -29.82 0.21 + } + + private static void PrintPrediction(float value, SsaSpikePrediction prediction) => + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", value, prediction.Prediction[0], + prediction.Prediction[1], prediction.Prediction[2]); + + class TimeSeriesData + { + public float Value; + + public TimeSeriesData(float value) + { + Value = value; + } + } + + class SsaSpikePrediction + { + [VectorType(3)] + public double[] Prediction { get; set; } + } + } +} diff --git a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs index aa64de5495..681d53fe40 100644 --- a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs @@ -25,7 +25,7 @@ public static class TimeSeriesCatalog /// /// /// /// /// @@ -47,7 +47,7 @@ public static IidChangePointEstimator DetectIidChangePoint(this TransformsCatalo /// /// /// /// /// @@ -73,7 +73,7 @@ public static IidSpikeEstimator DetectIidSpike(this TransformsCatalog catalog, s /// /// /// /// /// @@ -110,7 +110,7 @@ public static SsaChangePointEstimator DetectChangePointBySsa(this TransformsCata /// /// /// /// /// diff --git a/src/Microsoft.ML.TimeSeries/PredictionFunction.cs b/src/Microsoft.ML.TimeSeries/PredictionFunction.cs index 070e0c3d10..7978b7142f 100644 --- a/src/Microsoft.ML.TimeSeries/PredictionFunction.cs +++ b/src/Microsoft.ML.TimeSeries/PredictionFunction.cs @@ -65,6 +65,14 @@ public sealed class TimeSeriesPredictionFunction : PredictionEngineB /// /// Usually . /// Path to file on disk where the updated model needs to be saved. + /// + /// + /// + /// + /// public void CheckPoint(IHostEnvironment env, string modelPath) { using (var file = File.Create(modelPath)) @@ -261,8 +269,8 @@ public static class PredictionFunctionExtensions /// /// /// /// ///