From 29ce82b556be41154fdc4409193211119d7e703d Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Mon, 23 Jul 2018 15:42:53 -0700 Subject: [PATCH 1/4] pass fold index to metrics. --- .../Models/BinaryClassificationMetrics.cs | 11 +++++++++++ src/Microsoft.ML/Models/ClassificationMetrics.cs | 10 ++++++++++ src/Microsoft.ML/Models/ClusterMetrics.cs | 11 +++++++++++ src/Microsoft.ML/Models/RegressionMetrics.cs | 11 +++++++++++ 4 files changed, 43 insertions(+) diff --git a/src/Microsoft.ML/Models/BinaryClassificationMetrics.cs b/src/Microsoft.ML/Models/BinaryClassificationMetrics.cs index f536f30ed0..0fa4823f1c 100644 --- a/src/Microsoft.ML/Models/BinaryClassificationMetrics.cs +++ b/src/Microsoft.ML/Models/BinaryClassificationMetrics.cs @@ -7,6 +7,7 @@ using Microsoft.ML.Runtime.Data; using System; using System.Collections.Generic; +using static Microsoft.ML.Runtime.Data.MetricKinds; namespace Microsoft.ML.Models { @@ -57,6 +58,7 @@ internal static List FromMetrics(IHostEnvironment e Entropy = metric.Entropy, F1Score = metric.F1Score, Auprc = metric.Auprc, + FoldIndex = metric.FoldIndex, ConfusionMatrix = confusionMatrices.Current, }); @@ -162,6 +164,12 @@ internal static List FromMetrics(IHostEnvironment e /// public ConfusionMatrix ConfusionMatrix { get; private set; } + /// + /// For cross validation gives fold number or standard deviation or average across all metrics. + /// In other cases equal to null. + /// + public string FoldIndex { get; private set; } + /// /// This class contains the public fields necessary to deserialize from IDataView. /// @@ -200,6 +208,9 @@ private sealed class SerializationClass [ColumnName(BinaryClassifierEvaluator.AuPrc)] public Double Auprc; + + [ColumnName(ColumnNames.FoldIndex)] + public string FoldIndex; #pragma warning restore 649 // never assigned } } diff --git a/src/Microsoft.ML/Models/ClassificationMetrics.cs b/src/Microsoft.ML/Models/ClassificationMetrics.cs index f3a2416bca..e39a8f760a 100644 --- a/src/Microsoft.ML/Models/ClassificationMetrics.cs +++ b/src/Microsoft.ML/Models/ClassificationMetrics.cs @@ -6,6 +6,7 @@ using Microsoft.ML.Runtime.Api; using Microsoft.ML.Runtime.Data; using System.Collections.Generic; +using static Microsoft.ML.Runtime.Data.MetricKinds; namespace Microsoft.ML.Models { @@ -127,6 +128,12 @@ internal static List FromMetrics(IHostEnvironment env, ID /// public double[] PerClassLogLoss { get; private set; } + /// + /// For cross validation gives fold number or standard deviation or average across all metrics. + /// In other cases equal to null. + /// + public string FoldIndex { get; private set; } + /// /// Gets the confusion matrix, or error matrix, of the classifier. /// @@ -155,6 +162,9 @@ private sealed class SerializationClass [ColumnName(MultiClassClassifierEvaluator.PerClassLogLoss)] public double[] PerClassLogLoss; + + [ColumnName(ColumnNames.FoldIndex)] + public string FoldIndex; #pragma warning restore 649 // never assigned } } diff --git a/src/Microsoft.ML/Models/ClusterMetrics.cs b/src/Microsoft.ML/Models/ClusterMetrics.cs index 7f88784ef8..a36e7cc893 100644 --- a/src/Microsoft.ML/Models/ClusterMetrics.cs +++ b/src/Microsoft.ML/Models/ClusterMetrics.cs @@ -7,6 +7,7 @@ using Microsoft.ML.Runtime.Data; using System; using System.Collections.Generic; +using static Microsoft.ML.Runtime.Data.MetricKinds; namespace Microsoft.ML.Models { @@ -73,6 +74,12 @@ internal static List FromOverallMetrics(IHostEnvironment env, ID /// public double AvgMinScore { get; private set; } + /// + /// For cross validation gives fold number or standard deviation or average across all metrics. + /// In other cases equal to null. + /// + public string FoldIndex { get; private set; } + /// /// This class contains the public fields necessary to deserialize from IDataView. /// @@ -88,6 +95,10 @@ private sealed class SerializationClass [ColumnName(Runtime.Data.ClusteringEvaluator.AvgMinScore)] public Double AvgMinScore; + [ColumnName(ColumnNames.FoldIndex)] + public string FoldIndex; + + #pragma warning restore 649 // never assigned } } diff --git a/src/Microsoft.ML/Models/RegressionMetrics.cs b/src/Microsoft.ML/Models/RegressionMetrics.cs index 64500f5e6c..0bbd81a270 100644 --- a/src/Microsoft.ML/Models/RegressionMetrics.cs +++ b/src/Microsoft.ML/Models/RegressionMetrics.cs @@ -7,6 +7,7 @@ using Microsoft.ML.Runtime.Data; using System; using System.Collections.Generic; +using static Microsoft.ML.Runtime.Data.MetricKinds; namespace Microsoft.ML.Models { @@ -40,6 +41,7 @@ internal static List FromOverallMetrics(IHostEnvironment env, Rms = metric.Rms, LossFn = metric.LossFn, RSquared = metric.RSquared, + FoldIndex = metric.FoldIndex, }); } @@ -90,6 +92,12 @@ internal static List FromOverallMetrics(IHostEnvironment env, /// public double RSquared { get; private set; } + /// + /// For cross validation gives fold number or standard deviation or average across all metrics. + /// In other cases equal to null. + /// + public string FoldIndex { get; private set; } + /// /// This class contains the public fields necessary to deserialize from IDataView. /// @@ -110,6 +118,9 @@ private sealed class SerializationClass [ColumnName(Runtime.Data.RegressionEvaluator.RSquared)] public Double RSquared; + + [ColumnName(ColumnNames.FoldIndex)] + public string FoldIndex; #pragma warning restore 649 // never assigned } } From c083812a4ea5e627680e93e8f6d764ea673b2257 Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Wed, 25 Jul 2018 12:54:01 -0700 Subject: [PATCH 2/4] address Pete comments --- .../Models/BinaryClassificationMetrics.cs | 12 ++++++------ src/Microsoft.ML/Models/ClassificationMetrics.cs | 11 ++++++----- src/Microsoft.ML/Models/ClusterMetrics.cs | 11 +++++------ src/Microsoft.ML/Models/RegressionMetrics.cs | 10 +++++----- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/Microsoft.ML/Models/BinaryClassificationMetrics.cs b/src/Microsoft.ML/Models/BinaryClassificationMetrics.cs index 0fa4823f1c..40cb93f114 100644 --- a/src/Microsoft.ML/Models/BinaryClassificationMetrics.cs +++ b/src/Microsoft.ML/Models/BinaryClassificationMetrics.cs @@ -36,7 +36,7 @@ internal static List FromMetrics(IHostEnvironment e var confusionMatrices = ConfusionMatrix.Create(env, confusionMatrix).GetEnumerator(); int Index = 0; - foreach(var metric in metricsEnumerable) + foreach (var metric in metricsEnumerable) { if (Index++ >= confusionMatriceStartIndex && !confusionMatrices.MoveNext()) @@ -58,7 +58,7 @@ internal static List FromMetrics(IHostEnvironment e Entropy = metric.Entropy, F1Score = metric.F1Score, Auprc = metric.Auprc, - FoldIndex = metric.FoldIndex, + RowTag = metric.RowTag, ConfusionMatrix = confusionMatrices.Current, }); @@ -165,10 +165,10 @@ internal static List FromMetrics(IHostEnvironment e public ConfusionMatrix ConfusionMatrix { get; private set; } /// - /// For cross validation gives fold number or standard deviation or average across all metrics. - /// In other cases equal to null. + /// For cross-validation, this is equal to "Fold N" for per-fold metric rows, "Overall" for the average metrics and "STD" for standard deviation. + /// For non-CV scenarios, this is equal to null /// - public string FoldIndex { get; private set; } + public string RowTag { get; private set; } /// /// This class contains the public fields necessary to deserialize from IDataView. @@ -210,7 +210,7 @@ private sealed class SerializationClass public Double Auprc; [ColumnName(ColumnNames.FoldIndex)] - public string FoldIndex; + public string RowTag; #pragma warning restore 649 // never assigned } } diff --git a/src/Microsoft.ML/Models/ClassificationMetrics.cs b/src/Microsoft.ML/Models/ClassificationMetrics.cs index e39a8f760a..189a9b1ebc 100644 --- a/src/Microsoft.ML/Models/ClassificationMetrics.cs +++ b/src/Microsoft.ML/Models/ClassificationMetrics.cs @@ -52,7 +52,8 @@ internal static List FromMetrics(IHostEnvironment env, ID LogLossReduction = metric.LogLossReduction, TopKAccuracy = metric.TopKAccuracy, PerClassLogLoss = metric.PerClassLogLoss, - ConfusionMatrix = confusionMatrices.Current + ConfusionMatrix = confusionMatrices.Current, + RowTag =metric.RowTag, }); } @@ -129,10 +130,10 @@ internal static List FromMetrics(IHostEnvironment env, ID public double[] PerClassLogLoss { get; private set; } /// - /// For cross validation gives fold number or standard deviation or average across all metrics. - /// In other cases equal to null. + /// For cross-validation, this is equal to "Fold N" for per-fold metric rows, "Overall" for the average metrics and "STD" for standard deviation. + /// For non-CV scenarios, this is equal to null /// - public string FoldIndex { get; private set; } + public string RowTag { get; private set; } /// /// Gets the confusion matrix, or error matrix, of the classifier. @@ -164,7 +165,7 @@ private sealed class SerializationClass public double[] PerClassLogLoss; [ColumnName(ColumnNames.FoldIndex)] - public string FoldIndex; + public string RowTag; #pragma warning restore 649 // never assigned } } diff --git a/src/Microsoft.ML/Models/ClusterMetrics.cs b/src/Microsoft.ML/Models/ClusterMetrics.cs index a36e7cc893..13873ed226 100644 --- a/src/Microsoft.ML/Models/ClusterMetrics.cs +++ b/src/Microsoft.ML/Models/ClusterMetrics.cs @@ -39,6 +39,7 @@ internal static List FromOverallMetrics(IHostEnvironment env, ID AvgMinScore = metric.AvgMinScore, Nmi = metric.Nmi, Dbi = metric.Dbi, + RowTag = metric.RowTag, }); } @@ -75,10 +76,10 @@ internal static List FromOverallMetrics(IHostEnvironment env, ID public double AvgMinScore { get; private set; } /// - /// For cross validation gives fold number or standard deviation or average across all metrics. - /// In other cases equal to null. + /// For cross-validation, this is equal to "Fold N" for per-fold metric rows, "Overall" for the average metrics and "STD" for standard deviation. + /// For non-CV scenarios, this is equal to null /// - public string FoldIndex { get; private set; } + public string RowTag { get; private set; } /// /// This class contains the public fields necessary to deserialize from IDataView. @@ -96,9 +97,7 @@ private sealed class SerializationClass public Double AvgMinScore; [ColumnName(ColumnNames.FoldIndex)] - public string FoldIndex; - - + public string RowTag; #pragma warning restore 649 // never assigned } } diff --git a/src/Microsoft.ML/Models/RegressionMetrics.cs b/src/Microsoft.ML/Models/RegressionMetrics.cs index 0bbd81a270..bf5ba625f6 100644 --- a/src/Microsoft.ML/Models/RegressionMetrics.cs +++ b/src/Microsoft.ML/Models/RegressionMetrics.cs @@ -41,7 +41,7 @@ internal static List FromOverallMetrics(IHostEnvironment env, Rms = metric.Rms, LossFn = metric.LossFn, RSquared = metric.RSquared, - FoldIndex = metric.FoldIndex, + RowTag = metric.RowTag, }); } @@ -93,10 +93,10 @@ internal static List FromOverallMetrics(IHostEnvironment env, public double RSquared { get; private set; } /// - /// For cross validation gives fold number or standard deviation or average across all metrics. - /// In other cases equal to null. + /// For cross-validation, this is equal to "Fold N" for per-fold metric rows, "Overall" for the average metrics and "STD" for standard deviation. + /// For non-CV scenarios, this is equal to null /// - public string FoldIndex { get; private set; } + public string RowTag { get; private set; } /// /// This class contains the public fields necessary to deserialize from IDataView. @@ -120,7 +120,7 @@ private sealed class SerializationClass public Double RSquared; [ColumnName(ColumnNames.FoldIndex)] - public string FoldIndex; + public string RowTag; #pragma warning restore 649 // never assigned } } From 73baf4c83648178db3876648f656b3f9cf6871ff Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Thu, 26 Jul 2018 10:37:27 -0700 Subject: [PATCH 3/4] merge with master --- src/Microsoft.ML/Models/BinaryClassificationMetrics.cs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/Microsoft.ML/Models/BinaryClassificationMetrics.cs b/src/Microsoft.ML/Models/BinaryClassificationMetrics.cs index 5a0ea0d128..2dc538259f 100644 --- a/src/Microsoft.ML/Models/BinaryClassificationMetrics.cs +++ b/src/Microsoft.ML/Models/BinaryClassificationMetrics.cs @@ -35,13 +35,8 @@ internal static List FromMetrics(IHostEnvironment e List metrics = new List(); var confusionMatrices = ConfusionMatrix.Create(env, confusionMatrix).GetEnumerator(); -<<<<<<< HEAD int index = 0; foreach(var metric in metricsEnumerable) -======= - int Index = 0; - foreach (var metric in metricsEnumerable) ->>>>>>> c083812a4ea5e627680e93e8f6d764ea673b2257 { if (index++ >= confusionMatriceStartIndex && !confusionMatrices.MoveNext()) From e1f76987b46be513e4262008617675bc21920121 Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Thu, 26 Jul 2018 10:39:46 -0700 Subject: [PATCH 4/4] formatting --- src/Microsoft.ML/Models/BinaryClassificationMetrics.cs | 2 +- src/Microsoft.ML/Models/ClassificationMetrics.cs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.ML/Models/BinaryClassificationMetrics.cs b/src/Microsoft.ML/Models/BinaryClassificationMetrics.cs index 2dc538259f..1714265674 100644 --- a/src/Microsoft.ML/Models/BinaryClassificationMetrics.cs +++ b/src/Microsoft.ML/Models/BinaryClassificationMetrics.cs @@ -36,7 +36,7 @@ internal static List FromMetrics(IHostEnvironment e var confusionMatrices = ConfusionMatrix.Create(env, confusionMatrix).GetEnumerator(); int index = 0; - foreach(var metric in metricsEnumerable) + foreach (var metric in metricsEnumerable) { if (index++ >= confusionMatriceStartIndex && !confusionMatrices.MoveNext()) diff --git a/src/Microsoft.ML/Models/ClassificationMetrics.cs b/src/Microsoft.ML/Models/ClassificationMetrics.cs index 0a5d9cb6ea..6c1c139278 100644 --- a/src/Microsoft.ML/Models/ClassificationMetrics.cs +++ b/src/Microsoft.ML/Models/ClassificationMetrics.cs @@ -53,7 +53,7 @@ internal static List FromMetrics(IHostEnvironment env, ID TopKAccuracy = metric.TopKAccuracy, PerClassLogLoss = metric.PerClassLogLoss, ConfusionMatrix = confusionMatrices.Current, - RowTag =metric.RowTag, + RowTag = metric.RowTag, }); }