From bfbb172c664e1dedb76ed67b2b199245b4194b31 Mon Sep 17 00:00:00 2001 From: Zeeshan Ahmed Date: Tue, 9 Apr 2019 12:51:18 -0700 Subject: [PATCH 1/3] Updated sample for Concatenate API. --- .../Dynamic/Transforms/Concatenate.cs | 70 +++++++++++-------- 1 file changed, 42 insertions(+), 28 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Concatenate.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Concatenate.cs index 22f6b7e321..7c34ebc035 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Concatenate.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Concatenate.cs @@ -1,9 +1,11 @@ using System; +using System.Collections.Generic; +using Microsoft.ML; using Microsoft.ML.Data; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { - public static class ConcatTransform + public static class Concatenate { public static void Example() { @@ -11,37 +13,41 @@ public static void Example() // as well as the source of randomness. var mlContext = new MLContext(); - // Get a small dataset as an IEnumerable and them read it as ML.NET's data type. - var data = SamplesUtils.DatasetUtils.GetInfertData(); - var trainData = mlContext.Data.LoadFromEnumerable(data); + // Create a small dataset as an IEnumerable. + var samples = new List() + { + new InputData(){ Feature1 = 0.1f, Feature2 = new[]{ 1.1f, 2.1f, 3.1f}, Feature3 = 1 }, + new InputData(){ Feature1 = 0.2f, Feature2 = new[]{ 1.2f, 2.2f, 3.2f}, Feature3 = 2 }, + new InputData(){ Feature1 = 0.3f, Feature2 = new[]{ 1.3f, 2.3f, 3.3f}, Feature3 = 3 }, + new InputData(){ Feature1 = 0.4f, Feature2 = new[]{ 1.4f, 2.4f, 3.4f}, Feature3 = 4 }, + new InputData(){ Feature1 = 0.5f, Feature2 = new[]{ 1.5f, 2.5f, 3.5f}, Feature3 = 5 }, + new InputData(){ Feature1 = 0.6f, Feature2 = new[]{ 1.6f, 2.6f, 3.6f}, Feature3 = 6 }, + }; - // Preview of the data. - // - // Age Case Education induced parity pooled.stratum row_num ... - // 26.0 1.0 0-5yrs 1.0 6.0 3.0 1.0 ... - // 42.0 1.0 0-5yrs 1.0 1.0 1.0 2.0 ... - // 39.0 1.0 0-5yrs 2.0 6.0 4.0 3.0 ... - // 34.0 1.0 0-5yrs 2.0 4.0 2.0 4.0 ... - // 35.0 1.0 6-11yrs 1.0 3.0 32.0 5.0 ... + // Convert training data to IDataView. + var dataview = mlContext.Data.LoadFromEnumerable(samples); - // A pipeline for concatenating the Age, Parity and Induced columns together into a vector that will be the Features column. + // A pipeline for concatenating the "Feature1", "Feature2" and "Feature3" columns together into a vector that will be the Features column. // Concatenation is necessary because learners take **feature vectors** as inputs. // e.g. var regressionTrainer = mlContext.Regression.Trainers.FastTree(labelColumn: "Label", featureColumn: "Features"); - string outputColumnName = "Features"; - var pipeline = mlContext.Transforms.Concatenate(outputColumnName, new[] { "Age", "Parity", "Induced" }); + // + // Please note that the "Feature3" column is converted from int32 to float using the ConvertType API. + // The Concatenate API requires all the columns to be of same type. + var pipeline = mlContext.Transforms.Conversion.ConvertType("Feature4", "Feature3", outputKind: DataKind.Single) + .Append(mlContext.Transforms.Concatenate("Features", new[] { "Feature1", "Feature2", "Feature4" })); // The transformed data. - var transformedData = pipeline.Fit(trainData).Transform(trainData); + var transformedData = pipeline.Fit(dataview).Transform(dataview); // Now let's take a look at what this concatenation did. - // We can extract the newly created column as an IEnumerable of SampleInfertDataWithFeatures, the class we define above. - var featuresColumn = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: false); + // We can extract the newly created column as an IEnumerable of TransformedData. + var featuresColumn = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: false); // And we can write out a few rows - Console.WriteLine($"{outputColumnName} column obtained post-transformation."); + Console.WriteLine($"Features column obtained post-transformation."); foreach (var featureRow in featuresColumn) { - foreach (var value in featureRow.Features.GetValues()) + foreach (var value in featureRow.Features) Console.Write($"{value} "); Console.WriteLine(""); } @@ -49,16 +55,24 @@ public static void Example() // Expected output: // Features column obtained post-transformation. // - // 26 6 1 - // 42 1 1 - // 39 6 2 - // 34 4 2 - // 35 3 1 + // 0.1 1.1 2.1 3.1 1 + // 0.2 1.2 2.2 3.2 2 + // 0.3 1.3 2.3 3.3 3 + // 0.4 1.4 2.4 3.4 4 + // 0.5 1.5 2.5 3.5 5 + // 0.6 1.6 2.6 3.6 6 + } + + private class InputData + { + public float Feature1; + public float[] Feature2; + public int Feature3; } - private class SampleInfertDataWithFeatures + private sealed class TransformedData : InputData { - public VBuffer Features { get; set; } + public float[] Features { get; set; } } } } From fe1adfcfbc542f812f41788ad53da0fa00289671 Mon Sep 17 00:00:00 2001 From: Zeeshan Ahmed Date: Tue, 9 Apr 2019 17:00:38 -0700 Subject: [PATCH 2/3] Addressed reviewers' comments. --- .../Dynamic/Transforms/Concatenate.cs | 31 ++++++++----------- 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Concatenate.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Concatenate.cs index 7c34ebc035..421d83632f 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Concatenate.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Concatenate.cs @@ -29,12 +29,11 @@ public static void Example() // A pipeline for concatenating the "Feature1", "Feature2" and "Feature3" columns together into a vector that will be the Features column. // Concatenation is necessary because learners take **feature vectors** as inputs. - // e.g. var regressionTrainer = mlContext.Regression.Trainers.FastTree(labelColumn: "Label", featureColumn: "Features"); // // Please note that the "Feature3" column is converted from int32 to float using the ConvertType API. - // The Concatenate API requires all the columns to be of same type. - var pipeline = mlContext.Transforms.Conversion.ConvertType("Feature4", "Feature3", outputKind: DataKind.Single) - .Append(mlContext.Transforms.Concatenate("Features", new[] { "Feature1", "Feature2", "Feature4" })); + // The Concatenate API requires all columns to be of same type. + var pipeline = mlContext.Transforms.Conversion.ConvertType("Feature3", outputKind: DataKind.Single) + .Append(mlContext.Transforms.Concatenate("Features", new[] { "Feature1", "Feature2", "Feature3" })); // The transformed data. var transformedData = pipeline.Fit(dataview).Transform(dataview); @@ -46,31 +45,27 @@ public static void Example() // And we can write out a few rows Console.WriteLine($"Features column obtained post-transformation."); foreach (var featureRow in featuresColumn) - { - foreach (var value in featureRow.Features) - Console.Write($"{value} "); - Console.WriteLine(""); - } + Console.WriteLine(string.Join(" ", featureRow.Features)); // Expected output: - // Features column obtained post-transformation. - // - // 0.1 1.1 2.1 3.1 1 - // 0.2 1.2 2.2 3.2 2 - // 0.3 1.3 2.3 3.3 3 - // 0.4 1.4 2.4 3.4 4 - // 0.5 1.5 2.5 3.5 5 - // 0.6 1.6 2.6 3.6 6 + // Features column obtained post-transformation. + // 0.1 1.1 2.1 3.1 1 + // 0.2 1.2 2.2 3.2 2 + // 0.3 1.3 2.3 3.3 3 + // 0.4 1.4 2.4 3.4 4 + // 0.5 1.5 2.5 3.5 5 + // 0.6 1.6 2.6 3.6 6 } private class InputData { public float Feature1; + [VectorType(3)] public float[] Feature2; public int Feature3; } - private sealed class TransformedData : InputData + private sealed class TransformedData { public float[] Features { get; set; } } From 448919b01d8c9fa9df7bf45749a96781297ee4f1 Mon Sep 17 00:00:00 2001 From: Zeeshan Ahmed Date: Wed, 10 Apr 2019 12:46:48 -0700 Subject: [PATCH 3/3] Addressed reviewers' comments. --- .../Microsoft.ML.Samples/Dynamic/Transforms/Concatenate.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Concatenate.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Concatenate.cs index 421d83632f..9b350227be 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Concatenate.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Concatenate.cs @@ -28,10 +28,10 @@ public static void Example() var dataview = mlContext.Data.LoadFromEnumerable(samples); // A pipeline for concatenating the "Feature1", "Feature2" and "Feature3" columns together into a vector that will be the Features column. - // Concatenation is necessary because learners take **feature vectors** as inputs. + // Concatenation is necessary because trainers take feature vectors as inputs. // - // Please note that the "Feature3" column is converted from int32 to float using the ConvertType API. - // The Concatenate API requires all columns to be of same type. + // Please note that the "Feature3" column is converted from int32 to float using the ConvertType. + // The Concatenate requires all columns to be of same type. var pipeline = mlContext.Transforms.Conversion.ConvertType("Feature3", outputKind: DataKind.Single) .Append(mlContext.Transforms.Concatenate("Features", new[] { "Feature1", "Feature2", "Feature3" }));