From caf49cc6ae43a482978cc4e6cd67e222ca728afc Mon Sep 17 00:00:00 2001 From: Prathyusha Korrapati Date: Mon, 4 Mar 2019 15:13:19 -0800 Subject: [PATCH 1/3] Upgraded TaxiFare solution to v0.11 --- .../tutorials/TaxiFarePrediction/Program.cs | 39 +++++-------------- .../TaxiFarePrediction.csproj | 2 +- .../tutorials/TaxiFarePrediction/TaxiTrip.cs | 24 ++++++------ 3 files changed, 22 insertions(+), 43 deletions(-) diff --git a/machine-learning/tutorials/TaxiFarePrediction/Program.cs b/machine-learning/tutorials/TaxiFarePrediction/Program.cs index 7dd49525042..3d80b6f91da 100644 --- a/machine-learning/tutorials/TaxiFarePrediction/Program.cs +++ b/machine-learning/tutorials/TaxiFarePrediction/Program.cs @@ -3,12 +3,7 @@ using System.IO; using Microsoft.Data.DataView; using Microsoft.ML; -using Microsoft.ML.Core.Data; using Microsoft.ML.Data; -using Microsoft.ML.Transforms; -using Microsoft.ML.Transforms.Categorical; -using Microsoft.ML.Transforms.Normalizers; -using Microsoft.ML.Transforms.Text; // namespace TaxiFarePrediction @@ -19,7 +14,6 @@ class Program static readonly string _trainDataPath = Path.Combine(Environment.CurrentDirectory, "Data", "taxi-fare-train.csv"); static readonly string _testDataPath = Path.Combine(Environment.CurrentDirectory, "Data", "taxi-fare-test.csv"); static readonly string _modelPath = Path.Combine(Environment.CurrentDirectory, "Data", "Model.zip"); - static TextLoader _textLoader; // static void Main(string[] args) @@ -31,22 +25,7 @@ static void Main(string[] args) // // - _textLoader = mlContext.Data.CreateTextLoader(new TextLoader.Arguments() - { - Separators = new[] { ',' }, - HasHeader = true, - Column = new[] - { - new TextLoader.Column("VendorId", DataKind.Text, 0), - new TextLoader.Column("RateCode", DataKind.Text, 1), - new TextLoader.Column("PassengerCount", DataKind.R4, 2), - new TextLoader.Column("TripTime", DataKind.R4, 3), - new TextLoader.Column("TripDistance", DataKind.R4, 4), - new TextLoader.Column("PaymentType", DataKind.Text, 5), - new TextLoader.Column("FareAmount", DataKind.R4, 6) - } - } - ); + //This code is been removed in v0.11 of ML.Net. // // @@ -65,19 +44,19 @@ static void Main(string[] args) public static ITransformer Train(MLContext mlContext, string dataPath) { // - IDataView dataView = _textLoader.Read(dataPath); + IDataView dataView = mlContext.Data.LoadFromTextFile(dataPath, hasHeader: true, separatorChar: ','); // // - var pipeline = mlContext.Transforms.CopyColumns(inputColumnName:"FareAmount", outputColumnName:"Label") - // + var pipeline = mlContext.Transforms.CopyColumns(outputColumnName: "Label", inputColumnName: "FareAmount") + // // - .Append(mlContext.Transforms.Categorical.OneHotEncoding("VendorId")) - .Append(mlContext.Transforms.Categorical.OneHotEncoding("RateCode")) - .Append(mlContext.Transforms.Categorical.OneHotEncoding("PaymentType")) + .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "VendorIdEncoded", inputColumnName: "VendorId")) + .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "RateCodeEncoded", inputColumnName: "RateCode")) + .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "PaymentTypeEncoded", inputColumnName: "PaymentType")) // // - .Append(mlContext.Transforms.Concatenate("Features", "VendorId", "RateCode", "PassengerCount", "TripTime", "TripDistance", "PaymentType")) + .Append(mlContext.Transforms.Concatenate("Features", "VendorIdEncoded", "RateCodeEncoded", "PassengerCount", "TripTime", "TripDistance", "PaymentTypeEncoded")) // // .Append(mlContext.Regression.Trainers.FastTree()); @@ -101,7 +80,7 @@ public static ITransformer Train(MLContext mlContext, string dataPath) private static void Evaluate(MLContext mlContext, ITransformer model) { // - IDataView dataView = _textLoader.Read(_testDataPath); + IDataView dataView = mlContext.Data.LoadFromTextFile(_testDataPath, hasHeader: true, separatorChar: ','); // // diff --git a/machine-learning/tutorials/TaxiFarePrediction/TaxiFarePrediction.csproj b/machine-learning/tutorials/TaxiFarePrediction/TaxiFarePrediction.csproj index 2157d2fa5d8..66b5be7ccd6 100644 --- a/machine-learning/tutorials/TaxiFarePrediction/TaxiFarePrediction.csproj +++ b/machine-learning/tutorials/TaxiFarePrediction/TaxiFarePrediction.csproj @@ -10,7 +10,7 @@ - + diff --git a/machine-learning/tutorials/TaxiFarePrediction/TaxiTrip.cs b/machine-learning/tutorials/TaxiFarePrediction/TaxiTrip.cs index 13c059572b5..15e210fc1f5 100644 --- a/machine-learning/tutorials/TaxiFarePrediction/TaxiTrip.cs +++ b/machine-learning/tutorials/TaxiFarePrediction/TaxiTrip.cs @@ -7,25 +7,25 @@ namespace TaxiFarePrediction // public class TaxiTrip { - [Column("0")] + [LoadColumn(0)] public string VendorId; - [Column("1")] + [LoadColumn(1)] public string RateCode; - - [Column("2")] + + [LoadColumn(2)] public float PassengerCount; - - [Column("3")] + + [LoadColumn(3)] public float TripTime; - - [Column("4")] + + [LoadColumn(4)] public float TripDistance; - - [Column("5")] + + [LoadColumn(5)] public string PaymentType; - - [Column("6")] + + [LoadColumn(6)] public float FareAmount; } From 70167ee39972a41e5b86f271671191487f8874d6 Mon Sep 17 00:00:00 2001 From: Prathyusha Korrapati Date: Mon, 22 Apr 2019 16:39:53 -0700 Subject: [PATCH 2/3] Migrated Github issues to Ml.Net 1.0.0-preview. --- .../GitHubIssueClassification.csproj | 2 +- .../GitHubIssueClassification/Program.cs | 30 +++++++------------ 2 files changed, 12 insertions(+), 20 deletions(-) diff --git a/machine-learning/tutorials/GitHubIssueClassification/GitHubIssueClassification.csproj b/machine-learning/tutorials/GitHubIssueClassification/GitHubIssueClassification.csproj index 9f0fdc281c3..904dbb36461 100644 --- a/machine-learning/tutorials/GitHubIssueClassification/GitHubIssueClassification.csproj +++ b/machine-learning/tutorials/GitHubIssueClassification/GitHubIssueClassification.csproj @@ -15,7 +15,7 @@ - + diff --git a/machine-learning/tutorials/GitHubIssueClassification/Program.cs b/machine-learning/tutorials/GitHubIssueClassification/Program.cs index 6cfd5b8a3fc..90cb329a40d 100644 --- a/machine-learning/tutorials/GitHubIssueClassification/Program.cs +++ b/machine-learning/tutorials/GitHubIssueClassification/Program.cs @@ -2,10 +2,7 @@ using System; using System.IO; using System.Linq; -using Microsoft.Data.DataView; using Microsoft.ML; -using Microsoft.ML.Data; -using Microsoft.ML.Transforms; // namespace GitHubIssueClassification @@ -55,7 +52,7 @@ static void Main(string[] args) // // - Evaluate(); + Evaluate(_trainingDataView.Schema); // // @@ -95,7 +92,7 @@ public static IEstimator BuildAndTrainModel(IDataView trainingData // Use the multi-class SDCA algorithm to predict the label using features. //Set the trainer/algorithm and map label to value (original readable state) // - var trainingPipeline = pipeline.Append(_mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent(DefaultColumnNames.Label, DefaultColumnNames.Features)) + var trainingPipeline = pipeline.Append(_mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy("Label", "Features")) .Append(_mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel")); // @@ -112,7 +109,7 @@ public static IEstimator BuildAndTrainModel(IDataView trainingData // Create prediction engine related to the loaded trained model // - _predEngine = _trainedModel.CreatePredictionEngine(_mlContext); + _predEngine = _mlContext.Model.CreatePredictionEngine(_trainedModel); // // GitHubIssue issue = new GitHubIssue() { @@ -135,7 +132,7 @@ public static IEstimator BuildAndTrainModel(IDataView trainingData } - public static void Evaluate() + public static void Evaluate(DataViewSchema trainigDataViewSchema) { // STEP 5: Evaluate the model in order to get the model's accuracy metrics Console.WriteLine($"=============== Evaluating to get model's accuracy metrics - Starting time: {DateTime.Now.ToString()} ==============="); @@ -155,8 +152,8 @@ public static void Evaluate() Console.WriteLine($"*************************************************************************************************************"); Console.WriteLine($"* Metrics for Multi-class Classification model - Test Data "); Console.WriteLine($"*------------------------------------------------------------------------------------------------------------"); - Console.WriteLine($"* MicroAccuracy: {testMetrics.AccuracyMicro:0.###}"); - Console.WriteLine($"* MacroAccuracy: {testMetrics.AccuracyMacro:0.###}"); + Console.WriteLine($"* MicroAccuracy: {testMetrics.MicroAccuracy:0.###}"); + Console.WriteLine($"* MacroAccuracy: {testMetrics.MacroAccuracy:0.###}"); Console.WriteLine($"* LogLoss: {testMetrics.LogLoss:#.###}"); Console.WriteLine($"* LogLossReduction: {testMetrics.LogLossReduction:#.###}"); Console.WriteLine($"*************************************************************************************************************"); @@ -164,7 +161,7 @@ public static void Evaluate() // Save the new model to .ZIP file // - SaveModelAsFile(_mlContext, _trainedModel); + SaveModelAsFile(_mlContext, trainigDataViewSchema, _trainedModel); // } @@ -172,11 +169,7 @@ public static void Evaluate() public static void PredictIssue() { // - ITransformer loadedModel; - using (var stream = new FileStream(_modelPath, FileMode.Open, FileAccess.Read, FileShare.Read)) - { - loadedModel = _mlContext.Model.Load(stream); - } + ITransformer loadedModel = _mlContext.Model.Load(_modelPath, out var modelInputSchema); // // @@ -185,7 +178,7 @@ public static void PredictIssue() //Predict label for single hard-coded issue // - _predEngine = loadedModel.CreatePredictionEngine(_mlContext); + _predEngine = _mlContext.Model.CreatePredictionEngine(loadedModel); // // @@ -198,11 +191,10 @@ public static void PredictIssue() } - private static void SaveModelAsFile(MLContext mlContext, ITransformer model) + private static void SaveModelAsFile(MLContext mlContext,DataViewSchema trainigDataViewSchema, ITransformer model) { // - using (var fs = new FileStream(_modelPath, FileMode.Create, FileAccess.Write, FileShare.Write)) - mlContext.Model.Save(model, fs); + mlContext.Model.Save(model, trainigDataViewSchema, _modelPath); // Console.WriteLine("The model is saved to {0}", _modelPath); From 3ea1d5d21f7a120e0fa73a204d5ec4675920b791 Mon Sep 17 00:00:00 2001 From: Prathyusha Korrapati Date: Wed, 24 Apr 2019 16:29:28 -0700 Subject: [PATCH 3/3] Fixed spelling --- .../tutorials/GitHubIssueClassification/Program.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/machine-learning/tutorials/GitHubIssueClassification/Program.cs b/machine-learning/tutorials/GitHubIssueClassification/Program.cs index 90cb329a40d..c06e57da286 100644 --- a/machine-learning/tutorials/GitHubIssueClassification/Program.cs +++ b/machine-learning/tutorials/GitHubIssueClassification/Program.cs @@ -132,7 +132,7 @@ public static IEstimator BuildAndTrainModel(IDataView trainingData } - public static void Evaluate(DataViewSchema trainigDataViewSchema) + public static void Evaluate(DataViewSchema trainingDataViewSchema) { // STEP 5: Evaluate the model in order to get the model's accuracy metrics Console.WriteLine($"=============== Evaluating to get model's accuracy metrics - Starting time: {DateTime.Now.ToString()} ==============="); @@ -161,7 +161,7 @@ public static void Evaluate(DataViewSchema trainigDataViewSchema) // Save the new model to .ZIP file // - SaveModelAsFile(_mlContext, trainigDataViewSchema, _trainedModel); + SaveModelAsFile(_mlContext, trainingDataViewSchema, _trainedModel); // } @@ -191,10 +191,10 @@ public static void PredictIssue() } - private static void SaveModelAsFile(MLContext mlContext,DataViewSchema trainigDataViewSchema, ITransformer model) + private static void SaveModelAsFile(MLContext mlContext,DataViewSchema trainingDataViewSchema, ITransformer model) { // - mlContext.Model.Save(model, trainigDataViewSchema, _modelPath); + mlContext.Model.Save(model, trainingDataViewSchema, _modelPath); // Console.WriteLine("The model is saved to {0}", _modelPath);