From fce8ef40c494266fcd2a404ff4a6c3d93336d84e Mon Sep 17 00:00:00 2001
From: Rogan Carr <rocarr@microsoft.com>
Date: Wed, 10 Apr 2019 13:39:36 -0700
Subject: [PATCH 1/8] Adding Regression GAM samples.

---
 .../Dynamic/Trainers/Regression/Gam.cs        | 189 +++++++++++-------
 .../Trainers/Regression/GamWithOptions.cs     | 180 +++++++++++------
 2 files changed, 233 insertions(+), 136 deletions(-)

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.cs
index dd8107452a..be545c86dc 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.cs
@@ -1,7 +1,7 @@
 ﻿using System;
-using System.Linq;
+using System.Collections.Generic;
 using Microsoft.ML;
-using Microsoft.ML.SamplesUtils;
+using Microsoft.ML.Data;
 
 namespace Samples.Dynamic.Trainers.Regression
 {
@@ -15,34 +15,32 @@ public static void Example()
             // as a catalog of available operations and as the source of randomness.
             var mlContext = new MLContext();
             
-            // Read the Housing regression dataset
-            var data = DatasetUtils.LoadHousingRegressionDataset(mlContext);
-
-            var labelName = "MedianHomeValue";
-            var featureNames = data.Schema
-                .Select(column => column.Name) // Get the column names
-                .Where(name => name != labelName) // Drop the Label
-                .ToArray();
-
-            // Create a pipeline.
-            var pipeline =
-                // Concatenate the features to create a Feature vector.
-                mlContext.Transforms.Concatenate("Features", featureNames)
-                // Append a GAM regression trainer, setting the "MedianHomeValue" column as the label of the dataset,
-                // the "Features" column produced by concatenation as the features column,
-                // and use a small number of bins to make it easy to visualize in the console window.
-                // For real applications, it is recommended to start with the default number of bins.
-                .Append(mlContext.Regression.Trainers.Gam(labelColumnName: labelName, featureColumnName: "Features", maximumBinCountPerFeature: 16));
-
-            // Train the pipeline.
-            var trainedPipeline = pipeline.Fit(data);
-
-            // Extract the model from the pipeline.
-            var gamModel = trainedPipeline.LastTransformer.Model;
+            // Create training and validation datasets.
+            var samples = GenerateData();
+
+            // Convert the dataset to an IDataView.
+            var data = mlContext.Data.LoadFromEnumerable(samples);
+
+            // Create train and set set.
+            var dataSets = mlContext.Data.TrainTestSplit(data);
+            var trainSet = dataSets.TrainSet;
+            var validSet = dataSets.TestSet;
+
+            // Create a GAM trainer.
+            // Use a small number of bins for this example.
+            var trainer = mlContext.Regression.Trainers.Gam(maximumBinCountPerFeature: 16);
+
+            // Fit the model to the data using a validation set.
+            // GAM will use a technique called validation pruning to tune the model after training
+            // to improve generalization.
+            var model = trainer.Fit(trainSet, validSet);
+            
+            // Extract the model parameters.
+            var gam = model.Model;
 
             // Now investigate the bias and shape functions of the GAM model.
             // The bias represents the average prediction for the training data.
-            Console.WriteLine($"Average predicted cost: {gamModel.Bias:0.00}");
+            Console.WriteLine($"Average prediction: {gam.Bias:0.00}");
 
             // Expected output:
             //   Average predicted cost: 22.53
@@ -51,56 +49,105 @@ public static void Example()
             // one response per feature. Unlike a linear model, this response is a function instead of a line.
             // Each feature response represents the deviation from the average prediction as a function of the 
             // feature value.
+            for (int i = 0; i < gam.NumberOfShapeFunctions; i++)
+            {
+                // Break a line.
+                Console.WriteLine();
+
+                // Get the bin upper bounds for the feature.
+                var binUpperBounds = gam.GetBinUpperBounds(i);
+                // Get the bin effects; these are the effect size for each bin.
+                var binEffects = gam.GetBinEffects(i);
 
-            // Let's investigate the TeacherRatio variable. This is the ratio of students to teachers,
-            // so the higher it is, the more students a teacher has in their classroom.
-            // First, let's get the index of the variable we want to look at.
-            var studentTeacherRatioIndex = featureNames.ToList().FindIndex(str => str.Equals("TeacherRatio"));
-
-            // Next, let's get the array of histogram bin upper bounds from the model for this feature.
-            // For each feature, the shape function is calculated at `MaxBins` locations along the range of 
-            // values that the feature takes, and the resulting shape function can be seen as a histogram of
-            // effects.
-            var teacherRatioBinUpperBounds = gamModel.GetBinUpperBounds(studentTeacherRatioIndex);
-            // And the array of bin effects; these are the effect size for each bin.
-            var teacherRatioBinEffects = gamModel.GetBinEffects(studentTeacherRatioIndex);
-
-            // Now, write the function to the console. The function is a set of bins, and the corresponding
-            // function values. You can think of GAMs as building a bar-chart lookup table.
-            Console.WriteLine("Student-Teacher Ratio");
-            for (int i = 0; i < teacherRatioBinUpperBounds.Count; i++)
-                Console.WriteLine($"x < {teacherRatioBinUpperBounds[i]:0.00} => {teacherRatioBinEffects[i]:0.000}");
-
-            //  Expected output:
-            //    Student-Teacher Ratio
-            //    x < 14.55 =>  2.105
-            //    x < 14.75 =>  2.326
-            //    x < 15.40 =>  0.903
-            //    x < 16.50 =>  0.651
-            //    x < 17.15 =>  0.587
-            //    x < 17.70 =>  0.624
-            //    x < 17.85 =>  0.684
-            //    x < 18.35 => -0.315
-            //    x < 18.55 => -0.542
-            //    x < 18.75 => -0.083
-            //    x < 19.40 => -0.442
-            //    x < 20.55 => -0.649
-            //    x < 21.05 => -1.579
-            //    x <   ∞   =>  0.318
+                // Now, write the function to the console. The function is a set of bins, and the corresponding
+                // function values. You can think of GAMs as building a bar-chart lookup table.
+                Console.WriteLine($"Feature{i}");
+                for (int j = 0; j < binUpperBounds.Count; j++)
+                    Console.WriteLine($"x < {binUpperBounds[j]:0.00} => {binEffects[j]:0.000}");
+            }
+
+            // Expected output:
+            //  Average prediction: 1.33
+            //
+            //  Feature0
+            //  x < -0.44 => 0.128
+            //  x < -0.38 => 0.066
+            //  x < -0.32 => 0.040
+            //  x < -0.26 => -0.006
+            //  x < -0.20 => -0.035
+            //  x < -0.13 => -0.050
+            //  x < 0.06 => -0.077
+            //  x < 0.12 => -0.075
+            //  x < 0.18 => -0.052
+            //  x < 0.25 => -0.031
+            //  x < 0.31 => -0.002
+            //  x < 0.37 => 0.040
+            //  x < 0.44 => 0.083
+            //  x < ∞ => 0.123
+
+            //  Feature1
+            //  x < 0.00 => -0.245
+            //  x < 0.06 => 0.671
+            //  x < 0.24 => 0.723
+            //  x < 0.31 => -0.141
+            //  x < 0.37 => -0.241
+            //  x < ∞ => -0.248
 
             // Let's consider this output. To score a given example, we look up the first bin where the inequality
             // is satisfied for the feature value. We can look at the whole function to get a sense for how the
-            // model responds to the variable on a global level. For the student-teacher-ratio variable, we can see
-            // that smaller class sizes are predictive of a higher house value, while student-teacher ratios higher 
-            // than about 18 lead to lower predictions in house value. This makes intuitive sense, as smaller class 
-            // sizes are desirable and also indicative of better-funded schools, which could make buyers likely to
-            // pay more for the house.
-            
-            // Another thing to notice is that these feature functions can be noisy. See student-teacher ratios > 21.05.
-            // Common practice is to use resampling methods to estimate a confidence interval at each bin. This will
-            // help to determine if the effect is real or just sampling noise. See for example 
+            // model responds to the variable on a global level.
+            // The model can be seen to reconstruct the parabolic and step-wise function, shifted with respect to the average
+            // expected output over the training set. Very few bins are used to model the second feature because the GAM model
+            // discards unchanged bins to create smaller models.
+            // One last thing to notice is that these feature functions can be noisy. While we know that Feature1 should be
+            // symmetric, this is not captured in the model. This is due to noise in the data. Common practice is to use 
+            // resampling methods to estimate a confidence interval at each bin. This will help to determine if the effect is 
+            // real or just sampling noise. See for example:
             // Tan, Caruana, Hooker, and Lou. "Distill-and-Compare: Auditing Black-Box Models Using Transparent Model 
             // Distillation." <a href='https://arxiv.org/abs/1710.06169'>arXiv:1710.06169</a>."
         }
+
+        private class Data
+        {
+            public float Label { get; set; }
+            [VectorType(2)]
+            public float[] Features { get; set; }
+        }
+
+        /// <summary>
+        /// Creates a dataset, an IEnumerable of Data objects, for a GAM sample. Feature1 is a parabola centered around 0,
+        /// while Feature2 is a simple piecewise function.
+        /// </summary>
+        /// <param name="numExamples">The number of examples to generate.</param>
+        /// <param name="seed">The seed for the random number generator used to produce data.</param>
+        /// <returns></returns>
+        private static IEnumerable<Data> GenerateData(int numExamples = 25000, int seed = 1)
+        {
+            float bias = 1.0f;
+            var rng = new Random(seed);
+            for (int i = 0; i < numExamples; i++)
+            {
+                // Generate random, uncoupled features.
+                var data = new Data {
+                    Features = new float[2] { (float)(rng.NextDouble() - 0.5), (float)(rng.NextDouble() - 0.5) }
+                };
+                // Compute the label from the shape functions and add noise.
+                data.Label = bias + Parabola(data.Features[0]) + SimplePiecewise(data.Features[1]) + (float)(rng.NextDouble() - 0.5);
+
+                yield return data;
+            }
+        }
+
+        private static float Parabola(float x) => x * x;
+
+        private static float SimplePiecewise(float x)
+        {
+            if (x < 0)
+                return 0;
+            else if (x < 0.25)
+                return 1;
+            else
+                return 0;
+        }
     }
 }
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.cs
index 33617b2d94..45e69e3562 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.cs
@@ -1,6 +1,5 @@
 ﻿using System;
 using System.Collections.Generic;
-using System.Linq;
 using Microsoft.ML;
 using Microsoft.ML.Data;
 using Microsoft.ML.Trainers.FastTree;
@@ -15,92 +14,143 @@ public static void Example()
         {
             // Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
             // as a catalog of available operations and as the source of randomness.
-            // Setting the seed to a fixed number in this example to make outputs deterministic.
-            var mlContext = new MLContext(seed: 0);
+            var mlContext = new MLContext();
 
-            // Create a list of training examples.
-            var examples = GenerateRandomDataPoints(1000);
+            // Create training and validation datasets.
+            var samples = GenerateData();
 
-            // Convert the examples list to an IDataView object, which is consumable by ML.NET API.
-            var trainingData = mlContext.Data.LoadFromEnumerable(examples);
+            // Convert the dataset to an IDataView.
+            var data = mlContext.Data.LoadFromEnumerable(samples);
 
-            // Define trainer options.
-            var options = new GamRegressionTrainer.Options
-            {
-                // The entropy (regularization) coefficient.
-                EntropyCoefficient = 0.3,
-                // Reduce the number of iterations to 50.
-                NumberOfIterations = 50
-            };
-
-            // Define the trainer.
-            var pipeline = mlContext.Regression.Trainers.Gam(options);
+            // Create train and set set.
+            var dataSets = mlContext.Data.TrainTestSplit(data);
+            var trainSet = dataSets.TrainSet;
+            var validSet = dataSets.TestSet;
 
-            // Train the model.
-            var model = pipeline.Fit(trainingData);
+            // Create a GAM trainer.
+            // Use a small number of bins for this example.
+            var trainer = mlContext.Regression.Trainers.Gam(
+                new GamRegressionTrainer.Options { MaximumBinCountPerFeature = 16 });
 
-            // Create testing examples. Use different random seed to make it different from training data.
-            var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
+            // Fit the model to the data using a validation set.
+            // GAM will use a technique called validation pruning to tune the model after training
+            // to improve generalization.
+            var model = trainer.Fit(trainSet, validSet);
 
-            // Run the model on test data set.
-            var transformedTestData = model.Transform(testData);
+            // Extract the model parameters.
+            var gam = model.Model;
 
-            // Convert IDataView object to a list.
-            var predictions = mlContext.Data.CreateEnumerable<Prediction>(transformedTestData, reuseRowObject: false).ToList();
-
-            // Look at 5 predictions
-            foreach (var p in predictions.Take(5))
-                Console.WriteLine($"Label: {p.Label:F3}, Prediction: {p.Score:F3}");
+            // Now investigate the bias and shape functions of the GAM model.
+            // The bias represents the average prediction for the training data.
+            Console.WriteLine($"Average prediction: {gam.Bias:0.00}");
 
             // Expected output:
-            //   Label: 0.985, Prediction: 0.841
-            //   Label: 0.155, Prediction: 0.187
-            //   Label: 0.515, Prediction: 0.496
-            //   Label: 0.566, Prediction: 0.467
-            //   Label: 0.096, Prediction: 0.144
+            //   Average predicted cost: 22.53
 
-            // Evaluate the overall metrics
-            var metrics = mlContext.Regression.Evaluate(transformedTestData);
-            Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
+            // Let's take a look at the features that the model built. Similar to a linear model, we have
+            // one response per feature. Unlike a linear model, this response is a function instead of a line.
+            // Each feature response represents the deviation from the average prediction as a function of the 
+            // feature value.
+            for (int i = 0; i < gam.NumberOfShapeFunctions; i++)
+            {
+                // Break a line.
+                Console.WriteLine();
+
+                // Get the bin upper bounds for the feature.
+                var binUpperBounds = gam.GetBinUpperBounds(i);
+                // Get the bin effects; these are the effect size for each bin.
+                var binEffects = gam.GetBinEffects(i);
+
+                // Now, write the function to the console. The function is a set of bins, and the corresponding
+                // function values. You can think of GAMs as building a bar-chart lookup table.
+                Console.WriteLine($"Feature{i}");
+                for (int j = 0; j < binUpperBounds.Count; j++)
+                    Console.WriteLine($"x < {binUpperBounds[j]:0.00} => {binEffects[j]:0.000}");
+            }
 
             // Expected output:
-            //   Mean Absolute Error: 0.06
-            //   Mean Squared Error: 0.01
-            //   Root Mean Squared Error: 0.08
-            //   RSquared: 0.93
+            //  Average prediction: 1.33
+            //
+            //  Feature0
+            //  x < -0.44 => 0.128
+            //  x < -0.38 => 0.066
+            //  x < -0.32 => 0.040
+            //  x < -0.26 => -0.006
+            //  x < -0.20 => -0.035
+            //  x < -0.13 => -0.050
+            //  x < 0.06 => -0.077
+            //  x < 0.12 => -0.075
+            //  x < 0.18 => -0.052
+            //  x < 0.25 => -0.031
+            //  x < 0.31 => -0.002
+            //  x < 0.37 => 0.040
+            //  x < 0.44 => 0.083
+            //  x < ∞ => 0.123
+
+            //  Feature1
+            //  x < 0.00 => -0.245
+            //  x < 0.06 => 0.671
+            //  x < 0.24 => 0.723
+            //  x < 0.31 => -0.141
+            //  x < 0.37 => -0.241
+            //  x < ∞ => -0.248
+
+            // Let's consider this output. To score a given example, we look up the first bin where the inequality
+            // is satisfied for the feature value. We can look at the whole function to get a sense for how the
+            // model responds to the variable on a global level.
+            // The model can be seen to reconstruct the parabolic and step-wise function, shifted with respect to the average
+            // expected output over the training set. Very few bins are used to model the second feature because the GAM model
+            // discards unchanged bins to create smaller models.
+            // One last thing to notice is that these feature functions can be noisy. While we know that Feature1 should be
+            // symmetric, this is not captured in the model. This is due to noise in the data. Common practice is to use 
+            // resampling methods to estimate a confidence interval at each bin. This will help to determine if the effect is 
+            // real or just sampling noise. See for example:
+            // Tan, Caruana, Hooker, and Lou. "Distill-and-Compare: Auditing Black-Box Models Using Transparent Model 
+            // Distillation." <a href='https://arxiv.org/abs/1710.06169'>arXiv:1710.06169</a>."
+        }
+
+        private class Data
+        {
+            public float Label { get; set; }
+            [VectorType(2)]
+            public float[] Features { get; set; }
         }
 
-        private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int seed=0)
+        /// <summary>
+        /// Creates a dataset, an IEnumerable of Data objects, for a GAM sample. Feature1 is a parabola centered around 0,
+        /// while Feature2 is a simple piecewise function.
+        /// </summary>
+        /// <param name="numExamples">The number of examples to generate.</param>
+        /// <param name="seed">The seed for the random number generator used to produce data.</param>
+        /// <returns></returns>
+        private static IEnumerable<Data> GenerateData(int numExamples = 25000, int seed = 1)
         {
-            var random = new Random(seed);
-            float randomFloat() => (float)random.NextDouble();
-            for (int i = 0; i < count; i++)
+            float bias = 1.0f;
+            var rng = new Random(seed);
+            for (int i = 0; i < numExamples; i++)
             {
-                var label = randomFloat();
-                yield return new DataPoint
+                // Generate random, uncoupled features.
+                var data = new Data
                 {
-                    Label = label,
-                    // Create random features that are correlated with label.
-                    Features = Enumerable.Repeat(label, 50).Select(x => x + randomFloat()).ToArray()
+                    Features = new float[2] { (float)(rng.NextDouble() - 0.5), (float)(rng.NextDouble() - 0.5) }
                 };
+                // Compute the label from the shape functions and add noise.
+                data.Label = bias + Parabola(data.Features[0]) + SimplePiecewise(data.Features[1]) + (float)(rng.NextDouble() - 0.5);
+
+                yield return data;
             }
         }
 
-        // Example with label and 50 feature values. A data set is a collection of such examples.
-        private class DataPoint
-        {
-            public float Label { get; set; }
-            [VectorType(50)]
-            public float[] Features { get; set; }
-        }
+        private static float Parabola(float x) => x * x;
 
-        // Class used to capture predictions.
-        private class Prediction
+        private static float SimplePiecewise(float x)
         {
-            // Original label.
-            public float Label { get; set; }
-            // Predicted score from the trainer.
-            public float Score { get; set; }
+            if (x < 0)
+                return 0;
+            else if (x < 0.25)
+                return 1;
+            else
+                return 0;
         }
     }
-}
\ No newline at end of file
+}

From 7d335d05fb766104a20b6a43309698d76ac8116b Mon Sep 17 00:00:00 2001
From: Rogan Carr <rocarr@microsoft.com>
Date: Wed, 10 Apr 2019 15:52:40 -0700
Subject: [PATCH 2/8] Adding GAM Binary Classification samples.

---
 .../Trainers/BinaryClassification/Gam.cs      | 147 +++++++++++++++++
 .../BinaryClassification/GamWithOptions.cs    | 150 ++++++++++++++++++
 .../Dynamic/Trainers/Regression/Gam.cs        |   6 +-
 .../Trainers/Regression/GamWithOptions.cs     |   8 +-
 .../TreeTrainersCatalog.cs                    |  18 ++-
 5 files changed, 318 insertions(+), 11 deletions(-)
 create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs
 create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/GamWithOptions.cs

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs
new file mode 100644
index 0000000000..90af558c0b
--- /dev/null
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs
@@ -0,0 +1,147 @@
+﻿using System;
+using System.Collections.Generic;
+using Microsoft.ML;
+using Microsoft.ML.Data;
+
+namespace Samples.Dynamic.Trainers.BinaryClassification
+{
+    public static class Gam
+    {
+        // This example requires installation of additional NuGet package
+        // <a href="https://www.nuget.org/packages/Microsoft.ML.FastTree/">Microsoft.ML.FastTree</a>.
+        public static void Example()
+        {
+            // Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
+            // as a catalog of available operations and as the source of randomness.
+            var mlContext = new MLContext();
+            
+            // Create training and validation datasets.
+            var samples = GenerateData();
+
+            // Convert the dataset to an IDataView.
+            var data = mlContext.Data.LoadFromEnumerable(samples);
+
+            // Create train and set set.
+            var dataSets = mlContext.Data.TrainTestSplit(data);
+            var trainSet = dataSets.TrainSet;
+            var validSet = dataSets.TestSet;
+
+            // Create a GAM trainer.
+            // Use a small number of bins for this example.
+            var trainer = mlContext.BinaryClassification.Trainers.Gam(maximumBinCountPerFeature: 16);
+
+            // Fit the model to the data using a validation set.
+            // GAM will use a technique called validation pruning to tune the model after training
+            // to improve generalization.
+            var model = trainer.Fit(trainSet, validSet);
+            
+            // Extract the model parameters.
+            var gam = model.Model.SubModel;
+
+            // Now investigate the bias and shape functions of the GAM model.
+            // The bias represents the average prediction for the training data.
+            Console.WriteLine($"Average prediction: {gam.Bias:0.00}");
+
+            // Let's take a look at the features that the model built. Similar to a linear model, we have
+            // one response per feature. Unlike a linear model, this response is a function instead of a line.
+            // Each feature response represents the deviation from the average prediction as a function of the 
+            // feature value.
+            for (int i = 0; i < gam.NumberOfShapeFunctions; i++)
+            {
+                // Break a line.
+                Console.WriteLine();
+
+                // Get the bin upper bounds for the feature.
+                var binUpperBounds = gam.GetBinUpperBounds(i);
+                // Get the bin effects; these are the effect size for each bin.
+                var binEffects = gam.GetBinEffects(i);
+
+                // Now, write the function to the console. The function is a set of bins, and the corresponding
+                // function values. You can think of GAMs as building a bar-chart lookup table.
+                Console.WriteLine($"Feature{i}");
+                for (int j = 0; j < binUpperBounds.Count; j++)
+                    Console.WriteLine($"x < {binUpperBounds[j]:0.00} => {binEffects[j]:0.000}");
+            }
+
+            // Expected output:
+            //  Average prediction: 0.82
+            //
+            //  Feature0
+            //  x < -0.44 => 0.286
+            //  x < -0.38 => 0.225
+            //  x < -0.32 => 0.048
+            //  x < -0.26 => -0.110
+            //  x < -0.20 => -0.116
+            //  x < 0.18 => -0.143
+            //  x < 0.25 => -0.115
+            //  x < 0.31 => -0.005
+            //  x < 0.37 => 0.097
+            //  x < 0.44 => 0.263
+            //  x < ∞ => 0.284
+            //
+            //  Feature1
+            //  x < 0.00 => -0.350
+            //  x < 0.24 => 0.875
+            //  x < 0.31 => -0.138
+            //  x < ∞ => -0.188
+
+            // Let's consider this output. To score a given example, we look up the first bin where the inequality
+            // is satisfied for the feature value. We can look at the whole function to get a sense for how the
+            // model responds to the variable on a global level.
+            // The model can be seen to reconstruct the parabolic and step-wise function, shifted with respect to the average
+            // expected output over the training set. Very few bins are used to model the second feature because the GAM model
+            // discards unchanged bins to create smaller models.
+            // One last thing to notice is that these feature functions can be noisy. While we know that Feature1 should be
+            // symmetric, this is not captured in the model. This is due to noise in the data. Common practice is to use 
+            // resampling methods to estimate a confidence interval at each bin. This will help to determine if the effect is 
+            // real or just sampling noise. See for example:
+            // Tan, Caruana, Hooker, and Lou. "Distill-and-Compare: Auditing Black-Box Models Using Transparent Model 
+            // Distillation." <a href='https://arxiv.org/abs/1710.06169'>arXiv:1710.06169</a>."
+        }
+
+        private class Data
+        {
+            public bool Label { get; set; }
+
+            [VectorType(2)]
+            public float[] Features { get; set; }
+        }
+
+        /// <summary>
+        /// Creates a dataset, an IEnumerable of Data objects, for a GAM sample. Feature1 is a parabola centered around 0,
+        /// while Feature2 is a simple piecewise function.
+        /// </summary>
+        /// <param name="numExamples">The number of examples to generate.</param>
+        /// <param name="seed">The seed for the random number generator used to produce data.</param>
+        /// <returns></returns>
+        private static IEnumerable<Data> GenerateData(int numExamples = 25000, int seed = 1)
+        {
+            var rng = new Random(seed);
+            for (int i = 0; i < numExamples; i++)
+            {
+                // Generate random, uncoupled features.
+                var data = new Data {
+                    Features = new float[2] { (float)(rng.NextDouble() - 0.5), (float)(rng.NextDouble() - 0.5) }
+                };
+                // Compute the label from the shape functions and add noise.
+                data.Label = Sigmoid(Parabola(data.Features[0]) + SimplePiecewise(data.Features[1]) + (float)(rng.NextDouble() - 0.5)) > 0.5;
+
+                yield return data;
+            }
+        }
+
+        private static float Parabola(float x) => x * x;
+
+        private static float SimplePiecewise(float x)
+        {
+            if (x < 0)
+                return 0;
+            else if (x < 0.25)
+                return 1;
+            else
+                return 0;
+        }
+
+        private static double Sigmoid(double x) => 1.0 / (1.0 + Math.Exp(-1 * x));
+    }
+}
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/GamWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/GamWithOptions.cs
new file mode 100644
index 0000000000..792ccffd9b
--- /dev/null
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/GamWithOptions.cs
@@ -0,0 +1,150 @@
+﻿using System;
+using System.Collections.Generic;
+using Microsoft.ML;
+using Microsoft.ML.Data;
+using Microsoft.ML.Trainers.FastTree;
+
+namespace Samples.Dynamic.Trainers.BinaryClassification
+{
+    public static class GamWithOptions
+    {
+        // This example requires installation of additional NuGet package
+        // <a href="https://www.nuget.org/packages/Microsoft.ML.FastTree/">Microsoft.ML.FastTree</a>.
+        public static void Example()
+        {
+            // Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
+            // as a catalog of available operations and as the source of randomness.
+            var mlContext = new MLContext();
+
+            // Create training and validation datasets.
+            var samples = GenerateData();
+
+            // Convert the dataset to an IDataView.
+            var data = mlContext.Data.LoadFromEnumerable(samples);
+
+            // Create train and set set.
+            var dataSets = mlContext.Data.TrainTestSplit(data);
+            var trainSet = dataSets.TrainSet;
+            var validSet = dataSets.TestSet;
+
+            // Create a GAM trainer.
+            // Use a small number of bins for this example.
+            var trainer = mlContext.BinaryClassification.Trainers.Gam(
+                new GamBinaryTrainer.Options { MaximumBinCountPerFeature = 16 });
+
+            // Fit the model to the data using a validation set.
+            // GAM will use a technique called validation pruning to tune the model after training
+            // to improve generalization.
+            var model = trainer.Fit(trainSet, validSet);
+
+            // Extract the model parameters.
+            var gam = model.Model.SubModel;
+
+            // Now investigate the bias and shape functions of the GAM model.
+            // The bias represents the average prediction for the training data.
+            Console.WriteLine($"Average prediction: {gam.Bias:0.00}");
+
+            // Let's take a look at the features that the model built. Similar to a linear model, we have
+            // one response per feature. Unlike a linear model, this response is a function instead of a line.
+            // Each feature response represents the deviation from the average prediction as a function of the 
+            // feature value.
+            for (int i = 0; i < gam.NumberOfShapeFunctions; i++)
+            {
+                // Break a line.
+                Console.WriteLine();
+
+                // Get the bin upper bounds for the feature.
+                var binUpperBounds = gam.GetBinUpperBounds(i);
+                // Get the bin effects; these are the effect size for each bin.
+                var binEffects = gam.GetBinEffects(i);
+
+                // Now, write the function to the console. The function is a set of bins, and the corresponding
+                // function values. You can think of GAMs as building a bar-chart lookup table.
+                Console.WriteLine($"Feature{i}");
+                for (int j = 0; j < binUpperBounds.Count; j++)
+                    Console.WriteLine($"x < {binUpperBounds[j]:0.00} => {binEffects[j]:0.000}");
+            }
+
+            // Expected output:
+            //  Average prediction: 0.82
+            //
+            //  Feature0
+            //  x < -0.44 => 0.286
+            //  x < -0.38 => 0.225
+            //  x < -0.32 => 0.048
+            //  x < -0.26 => -0.110
+            //  x < -0.20 => -0.116
+            //  x < 0.18 => -0.143
+            //  x < 0.25 => -0.115
+            //  x < 0.31 => -0.005
+            //  x < 0.37 => 0.097
+            //  x < 0.44 => 0.263
+            //  x < ∞ => 0.284
+            //
+            //  Feature1
+            //  x < 0.00 => -0.350
+            //  x < 0.24 => 0.875
+            //  x < 0.31 => -0.138
+            //  x < ∞ => -0.188
+
+            // Let's consider this output. To score a given example, we look up the first bin where the inequality
+            // is satisfied for the feature value. We can look at the whole function to get a sense for how the
+            // model responds to the variable on a global level.
+            // The model can be seen to reconstruct the parabolic and step-wise function, shifted with respect to the average
+            // expected output over the training set. Very few bins are used to model the second feature because the GAM model
+            // discards unchanged bins to create smaller models.
+            // One last thing to notice is that these feature functions can be noisy. While we know that Feature1 should be
+            // symmetric, this is not captured in the model. This is due to noise in the data. Common practice is to use 
+            // resampling methods to estimate a confidence interval at each bin. This will help to determine if the effect is 
+            // real or just sampling noise. See for example:
+            // Tan, Caruana, Hooker, and Lou. "Distill-and-Compare: Auditing Black-Box Models Using Transparent Model 
+            // Distillation." <a href='https://arxiv.org/abs/1710.06169'>arXiv:1710.06169</a>."
+        }
+
+        private class Data
+        {
+            public bool Label { get; set; }
+
+            [VectorType(2)]
+            public float[] Features { get; set; }
+        }
+
+        /// <summary>
+        /// Creates a dataset, an IEnumerable of Data objects, for a GAM sample. Feature1 is a parabola centered around 0,
+        /// while Feature2 is a simple piecewise function.
+        /// </summary>
+        /// <param name="numExamples">The number of examples to generate.</param>
+        /// <param name="seed">The seed for the random number generator used to produce data.</param>
+        /// <returns></returns>
+        private static IEnumerable<Data> GenerateData(int numExamples = 25000, int seed = 1)
+        {
+            var rng = new Random(seed);
+            for (int i = 0; i < numExamples; i++)
+            {
+                // Generate random, uncoupled features.
+                var data = new Data
+                {
+                    Features = new float[2] { (float)(rng.NextDouble() - 0.5), (float)(rng.NextDouble() - 0.5) }
+                };
+                // Compute the label from the shape functions and add noise.
+                data.Label = Sigmoid(Parabola(data.Features[0]) + SimplePiecewise(data.Features[1]) + (float)(rng.NextDouble() - 0.5)) > 0.5;
+
+                yield return data;
+            }
+        }
+
+        private static float Parabola(float x) => x * x;
+
+        private static float SimplePiecewise(float x)
+        {
+            if (x < 0)
+                return 0;
+            else if (x < 0.25)
+                return 1;
+            else
+                return 0;
+        }
+
+        private static double Sigmoid(double x) => 1.0 / (1.0 + Math.Exp(-1 * x));
+    }
+}
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.cs
index be545c86dc..ec28053f24 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.cs
@@ -42,9 +42,6 @@ public static void Example()
             // The bias represents the average prediction for the training data.
             Console.WriteLine($"Average prediction: {gam.Bias:0.00}");
 
-            // Expected output:
-            //   Average predicted cost: 22.53
-
             // Let's take a look at the features that the model built. Similar to a linear model, we have
             // one response per feature. Unlike a linear model, this response is a function instead of a line.
             // Each feature response represents the deviation from the average prediction as a function of the 
@@ -84,7 +81,7 @@ public static void Example()
             //  x < 0.37 => 0.040
             //  x < 0.44 => 0.083
             //  x < ∞ => 0.123
-
+            //
             //  Feature1
             //  x < 0.00 => -0.245
             //  x < 0.06 => 0.671
@@ -110,6 +107,7 @@ public static void Example()
         private class Data
         {
             public float Label { get; set; }
+
             [VectorType(2)]
             public float[] Features { get; set; }
         }
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.cs
index 45e69e3562..c03621bbba 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.cs
@@ -44,9 +44,6 @@ public static void Example()
             // The bias represents the average prediction for the training data.
             Console.WriteLine($"Average prediction: {gam.Bias:0.00}");
 
-            // Expected output:
-            //   Average predicted cost: 22.53
-
             // Let's take a look at the features that the model built. Similar to a linear model, we have
             // one response per feature. Unlike a linear model, this response is a function instead of a line.
             // Each feature response represents the deviation from the average prediction as a function of the 
@@ -86,7 +83,7 @@ public static void Example()
             //  x < 0.37 => 0.040
             //  x < 0.44 => 0.083
             //  x < ∞ => 0.123
-
+            //
             //  Feature1
             //  x < 0.00 => -0.245
             //  x < 0.06 => 0.671
@@ -94,7 +91,7 @@ public static void Example()
             //  x < 0.31 => -0.141
             //  x < 0.37 => -0.241
             //  x < ∞ => -0.248
-
+            
             // Let's consider this output. To score a given example, we look up the first bin where the inequality
             // is satisfied for the feature value. We can look at the whole function to get a sense for how the
             // model responds to the variable on a global level.
@@ -112,6 +109,7 @@ public static void Example()
         private class Data
         {
             public float Label { get; set; }
+
             [VectorType(2)]
             public float[] Features { get; set; }
         }
diff --git a/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs b/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs
index 02ab57a7fa..7031a673da 100644
--- a/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs
+++ b/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs
@@ -173,6 +173,13 @@ public static FastTreeRankingTrainer FastTree(this RankingCatalog.RankingTrainer
         /// <param name="numberOfIterations">The number of iterations to use in learning the features.</param>
         /// <param name="maximumBinCountPerFeature">The maximum number of bins to use to approximate features.</param>
         /// <param name="learningRate">The learning rate. GAMs work best with a small learning rate.</param>
+        /// <example>
+        /// <format type="text/markdown">
+        /// <![CDATA[
+        /// [!code-csharp[Gam](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs)]
+        /// ]]>
+        /// </format>
+        /// </example>
         public static GamBinaryTrainer Gam(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
             string labelColumnName = DefaultColumnNames.Label,
             string featureColumnName = DefaultColumnNames.Features,
@@ -191,6 +198,13 @@ public static GamBinaryTrainer Gam(this BinaryClassificationCatalog.BinaryClassi
         /// </summary>
         /// <param name="catalog">The <see cref="BinaryClassificationCatalog"/>.</param>
         /// <param name="options">Trainer options.</param>
+        /// <example>
+        /// <format type="text/markdown">
+        /// <![CDATA[
+        /// [!code-csharp[Gam](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/GamWithOptions.cs)]
+        /// ]]>
+        /// </format>
+        /// </example>
         public static GamBinaryTrainer Gam(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
             GamBinaryTrainer.Options options)
         {
@@ -212,7 +226,7 @@ public static GamBinaryTrainer Gam(this BinaryClassificationCatalog.BinaryClassi
         /// <example>
         /// <format type="text/markdown">
         /// <![CDATA[
-        /// [!code-csharp[GamRegression](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.cs)]
+        /// [!code-csharp[Gam](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.cs)]
         /// ]]>
         /// </format>
         /// </example>
@@ -237,7 +251,7 @@ public static GamRegressionTrainer Gam(this RegressionCatalog.RegressionTrainers
         /// <example>
         /// <format type="text/markdown">
         /// <![CDATA[
-        /// [!code-csharp[GamRegression](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.cs)]
+        /// [!code-csharp[Gam](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.cs)]
         /// ]]>
         /// </format>
         /// </example>

From 73d8270f3b3ef63296a4a6d56fc21f5908cb8e22 Mon Sep 17 00:00:00 2001
From: Wei-Sheng Chin <wschin@outlook.com>
Date: Thu, 11 Apr 2019 10:46:21 -0700
Subject: [PATCH 3/8] Update
 docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs

Co-Authored-By: rogancarr <rogan.carr@hotmail.com>
---
 .../Dynamic/Trainers/BinaryClassification/Gam.cs                | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs
index 90af558c0b..aee31e62b7 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs
@@ -27,7 +27,7 @@ public static void Example()
             var validSet = dataSets.TestSet;
 
             // Create a GAM trainer.
-            // Use a small number of bins for this example.
+            // Use a small number of bins for this example. The setting below means for each feature, we divide its range into 16 independent discrete regions. For example, if a feature `Age`'s origin range is from 0 to 255. The first region might be 0-15 and the second region 16-31.
             var trainer = mlContext.BinaryClassification.Trainers.Gam(maximumBinCountPerFeature: 16);
 
             // Fit the model to the data using a validation set.

From 4469a8562534ab1d68fb7e4075d7f571c42b4607 Mon Sep 17 00:00:00 2001
From: Wei-Sheng Chin <wschin@outlook.com>
Date: Thu, 11 Apr 2019 10:46:31 -0700
Subject: [PATCH 4/8] Update
 docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs

Co-Authored-By: rogancarr <rogan.carr@hotmail.com>
---
 .../Dynamic/Trainers/BinaryClassification/Gam.cs                | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs
index aee31e62b7..ddd9e74cbd 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs
@@ -21,7 +21,7 @@ public static void Example()
             // Convert the dataset to an IDataView.
             var data = mlContext.Data.LoadFromEnumerable(samples);
 
-            // Create train and set set.
+            // Create training and validation sets.
             var dataSets = mlContext.Data.TrainTestSplit(data);
             var trainSet = dataSets.TrainSet;
             var validSet = dataSets.TestSet;

From 091fafd1a1c968588c00ae9ca16e8dd6ff0b670d Mon Sep 17 00:00:00 2001
From: Wei-Sheng Chin <wschin@outlook.com>
Date: Thu, 11 Apr 2019 10:46:47 -0700
Subject: [PATCH 5/8] Update
 docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs

Co-Authored-By: rogancarr <rogan.carr@hotmail.com>
---
 .../Dynamic/Trainers/BinaryClassification/Gam.cs                | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs
index ddd9e74cbd..463af1f93c 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs
@@ -43,7 +43,7 @@ public static void Example()
             Console.WriteLine($"Average prediction: {gam.Bias:0.00}");
 
             // Let's take a look at the features that the model built. Similar to a linear model, we have
-            // one response per feature. Unlike a linear model, this response is a function instead of a line.
+            // one response per feature. Unlike a linear model, this response is a step function (https://en.wikipedia.org/wiki/Step_function) instead of a line.
             // Each feature response represents the deviation from the average prediction as a function of the 
             // feature value.
             for (int i = 0; i < gam.NumberOfShapeFunctions; i++)

From 7f7c991cf3172e4b55de12bb8632cdfb9fe476f1 Mon Sep 17 00:00:00 2001
From: Wei-Sheng Chin <wschin@outlook.com>
Date: Thu, 11 Apr 2019 10:46:57 -0700
Subject: [PATCH 6/8] Update
 docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs

Co-Authored-By: rogancarr <rogan.carr@hotmail.com>
---
 .../Dynamic/Trainers/BinaryClassification/Gam.cs                | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs
index 463af1f93c..8b29087de0 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs
@@ -30,7 +30,7 @@ public static void Example()
             // Use a small number of bins for this example. The setting below means for each feature, we divide its range into 16 independent discrete regions. For example, if a feature `Age`'s origin range is from 0 to 255. The first region might be 0-15 and the second region 16-31.
             var trainer = mlContext.BinaryClassification.Trainers.Gam(maximumBinCountPerFeature: 16);
 
-            // Fit the model to the data using a validation set.
+            // Fit the model using both of training and validation sets.
             // GAM will use a technique called validation pruning to tune the model after training
             // to improve generalization.
             var model = trainer.Fit(trainSet, validSet);

From 1c992772d3da30ab64b0a420738291f47cf36b52 Mon Sep 17 00:00:00 2001
From: Rogan Carr <rocarr@microsoft.com>
Date: Thu, 11 Apr 2019 11:11:52 -0700
Subject: [PATCH 7/8] Addressing PR comments; rewriting comments.

---
 .../Trainers/BinaryClassification/Gam.cs      | 32 ++++++++++-------
 .../BinaryClassification/GamWithOptions.cs    | 34 ++++++++++--------
 .../Dynamic/Trainers/Regression/Gam.cs        | 36 +++++++++++--------
 .../Trainers/Regression/GamWithOptions.cs     | 26 ++++++++------
 4 files changed, 76 insertions(+), 52 deletions(-)

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs
index 8b29087de0..a9f54f7f65 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs
@@ -15,7 +15,7 @@ public static void Example()
             // as a catalog of available operations and as the source of randomness.
             var mlContext = new MLContext();
             
-            // Create training and validation datasets.
+            // Create the dataset.
             var samples = GenerateData();
 
             // Convert the dataset to an IDataView.
@@ -27,25 +27,30 @@ public static void Example()
             var validSet = dataSets.TestSet;
 
             // Create a GAM trainer.
-            // Use a small number of bins for this example. The setting below means for each feature, we divide its range into 16 independent discrete regions. For example, if a feature `Age`'s origin range is from 0 to 255. The first region might be 0-15 and the second region 16-31.
+            // Use a small number of bins for this example. The setting below means for each feature, 
+            // we divide its range into 16 discrete regions for the training process. Note that these
+            // regions are not evenly spaced, and that the final model may contain fewer bins, as
+            // neighboring bins with identical values will be combined. In general, we recommend using
+            // at least the default number of bins, as a small number of bins limits the capacity of
+            // the model.
             var trainer = mlContext.BinaryClassification.Trainers.Gam(maximumBinCountPerFeature: 16);
 
-            // Fit the model using both of training and validation sets.
-            // GAM will use a technique called validation pruning to tune the model after training
-            // to improve generalization.
+            // Fit the model using both of training and validation sets. GAM can use a technique called 
+            // pruning to tune the model to the validation set after training to improve generalization.
             var model = trainer.Fit(trainSet, validSet);
             
             // Extract the model parameters.
             var gam = model.Model.SubModel;
 
-            // Now investigate the bias and shape functions of the GAM model.
-            // The bias represents the average prediction for the training data.
+            // Now we can inspect the parameters of the Generalized Additive Model to understand the fit
+            // and potentially learn about our dataset.
+            // First, we will look at the bias; the bias represents the average prediction for the training data.
             Console.WriteLine($"Average prediction: {gam.Bias:0.00}");
 
-            // Let's take a look at the features that the model built. Similar to a linear model, we have
-            // one response per feature. Unlike a linear model, this response is a step function (https://en.wikipedia.org/wiki/Step_function) instead of a line.
-            // Each feature response represents the deviation from the average prediction as a function of the 
-            // feature value.
+            // Now look at the shape functions that the model has learned. Similar to a linear model, we have
+            // one response per feature, and they are independent. Unlike a linear model, this response is a 
+            // generic function instead of a line. Because we have included a bias term, each feature response 
+            // represents the deviation from the average prediction as a function of the feature value.
             for (int i = 0; i < gam.NumberOfShapeFunctions; i++)
             {
                 // Break a line.
@@ -53,11 +58,12 @@ public static void Example()
 
                 // Get the bin upper bounds for the feature.
                 var binUpperBounds = gam.GetBinUpperBounds(i);
-                // Get the bin effects; these are the effect size for each bin.
+
+                // Get the bin effects; these are the function values for each bin.
                 var binEffects = gam.GetBinEffects(i);
 
                 // Now, write the function to the console. The function is a set of bins, and the corresponding
-                // function values. You can think of GAMs as building a bar-chart lookup table.
+                // function values. You can think of GAMs as building a bar-chart or lookup table for each feature.
                 Console.WriteLine($"Feature{i}");
                 for (int j = 0; j < binUpperBounds.Count; j++)
                     Console.WriteLine($"x < {binUpperBounds[j]:0.00} => {binEffects[j]:0.000}");
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/GamWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/GamWithOptions.cs
index 792ccffd9b..40697a2322 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/GamWithOptions.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/GamWithOptions.cs
@@ -16,38 +16,43 @@ public static void Example()
             // as a catalog of available operations and as the source of randomness.
             var mlContext = new MLContext();
 
-            // Create training and validation datasets.
+            // Create the dataset.
             var samples = GenerateData();
 
             // Convert the dataset to an IDataView.
             var data = mlContext.Data.LoadFromEnumerable(samples);
 
-            // Create train and set set.
+            // Create training and validation datasets.
             var dataSets = mlContext.Data.TrainTestSplit(data);
             var trainSet = dataSets.TrainSet;
             var validSet = dataSets.TestSet;
 
             // Create a GAM trainer.
-            // Use a small number of bins for this example.
+            // Use a small number of bins for this example. The setting below means for each feature, 
+            // we divide its range into 16 discrete regions for the training process. Note that these
+            // regions are not evenly spaced, and that the final model may contain fewer bins, as
+            // neighboring bins with identical values will be combined. In general, we recommend using
+            // at least the default number of bins, as a small number of bins limits the capacity of
+            // the model.
             var trainer = mlContext.BinaryClassification.Trainers.Gam(
                 new GamBinaryTrainer.Options { MaximumBinCountPerFeature = 16 });
 
-            // Fit the model to the data using a validation set.
-            // GAM will use a technique called validation pruning to tune the model after training
-            // to improve generalization.
+            // Fit the model using both of training and validation sets. GAM can use a technique called 
+            // pruning to tune the model to the validation set after training to improve generalization.
             var model = trainer.Fit(trainSet, validSet);
 
             // Extract the model parameters.
             var gam = model.Model.SubModel;
 
-            // Now investigate the bias and shape functions of the GAM model.
-            // The bias represents the average prediction for the training data.
+            // Now we can inspect the parameters of the Generalized Additive Model to understand the fit
+            // and potentially learn about our dataset.
+            // First, we will look at the bias; the bias represents the average prediction for the training data.
             Console.WriteLine($"Average prediction: {gam.Bias:0.00}");
 
-            // Let's take a look at the features that the model built. Similar to a linear model, we have
-            // one response per feature. Unlike a linear model, this response is a function instead of a line.
-            // Each feature response represents the deviation from the average prediction as a function of the 
-            // feature value.
+            // Now look at the shape functions that the model has learned. Similar to a linear model, we have
+            // one response per feature, and they are independent. Unlike a linear model, this response is a 
+            // generic function instead of a line. Because we have included a bias term, each feature response 
+            // represents the deviation from the average prediction as a function of the feature value.
             for (int i = 0; i < gam.NumberOfShapeFunctions; i++)
             {
                 // Break a line.
@@ -55,11 +60,12 @@ public static void Example()
 
                 // Get the bin upper bounds for the feature.
                 var binUpperBounds = gam.GetBinUpperBounds(i);
-                // Get the bin effects; these are the effect size for each bin.
+
+                // Get the bin effects; these are the function values for each bin.
                 var binEffects = gam.GetBinEffects(i);
 
                 // Now, write the function to the console. The function is a set of bins, and the corresponding
-                // function values. You can think of GAMs as building a bar-chart lookup table.
+                // function values. You can think of GAMs as building a bar-chart or lookup table for each feature.
                 Console.WriteLine($"Feature{i}");
                 for (int j = 0; j < binUpperBounds.Count; j++)
                     Console.WriteLine($"x < {binUpperBounds[j]:0.00} => {binEffects[j]:0.000}");
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.cs
index ec28053f24..ac4aebff2d 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.cs
@@ -14,38 +14,43 @@ public static void Example()
             // Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
             // as a catalog of available operations and as the source of randomness.
             var mlContext = new MLContext();
-            
-            // Create training and validation datasets.
+
+            // Create the dataset.
             var samples = GenerateData();
 
             // Convert the dataset to an IDataView.
             var data = mlContext.Data.LoadFromEnumerable(samples);
 
-            // Create train and set set.
+            // Create training and validation sets.
             var dataSets = mlContext.Data.TrainTestSplit(data);
             var trainSet = dataSets.TrainSet;
             var validSet = dataSets.TestSet;
 
             // Create a GAM trainer.
-            // Use a small number of bins for this example.
+            // Use a small number of bins for this example. The setting below means for each feature, 
+            // we divide its range into 16 discrete regions for the training process. Note that these
+            // regions are not evenly spaced, and that the final model may contain fewer bins, as
+            // neighboring bins with identical values will be combined. In general, we recommend using
+            // at least the default number of bins, as a small number of bins limits the capacity of
+            // the model.
             var trainer = mlContext.Regression.Trainers.Gam(maximumBinCountPerFeature: 16);
 
-            // Fit the model to the data using a validation set.
-            // GAM will use a technique called validation pruning to tune the model after training
-            // to improve generalization.
+            // Fit the model using both of training and validation sets. GAM can use a technique called 
+            // pruning to tune the model to the validation set after training to improve generalization.
             var model = trainer.Fit(trainSet, validSet);
             
             // Extract the model parameters.
             var gam = model.Model;
 
-            // Now investigate the bias and shape functions of the GAM model.
-            // The bias represents the average prediction for the training data.
+            // Now we can inspect the parameters of the Generalized Additive Model to understand the fit
+            // and potentially learn about our dataset.
+            // First, we will look at the bias; the bias represents the average prediction for the training data.
             Console.WriteLine($"Average prediction: {gam.Bias:0.00}");
 
-            // Let's take a look at the features that the model built. Similar to a linear model, we have
-            // one response per feature. Unlike a linear model, this response is a function instead of a line.
-            // Each feature response represents the deviation from the average prediction as a function of the 
-            // feature value.
+            // Now look at the shape functions that the model has learned. Similar to a linear model, we have
+            // one response per feature, and they are independent. Unlike a linear model, this response is a 
+            // generic function instead of a line. Because we have included a bias term, each feature response 
+            // represents the deviation from the average prediction as a function of the feature value.
             for (int i = 0; i < gam.NumberOfShapeFunctions; i++)
             {
                 // Break a line.
@@ -53,11 +58,12 @@ public static void Example()
 
                 // Get the bin upper bounds for the feature.
                 var binUpperBounds = gam.GetBinUpperBounds(i);
-                // Get the bin effects; these are the effect size for each bin.
+
+                // Get the bin effects; these are the function values for each bin.
                 var binEffects = gam.GetBinEffects(i);
 
                 // Now, write the function to the console. The function is a set of bins, and the corresponding
-                // function values. You can think of GAMs as building a bar-chart lookup table.
+                // function values. You can think of GAMs as building a bar-chart or lookup table for each feature.
                 Console.WriteLine($"Feature{i}");
                 for (int j = 0; j < binUpperBounds.Count; j++)
                     Console.WriteLine($"x < {binUpperBounds[j]:0.00} => {binEffects[j]:0.000}");
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.cs
index c03621bbba..d59cae4db0 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.cs
@@ -16,32 +16,37 @@ public static void Example()
             // as a catalog of available operations and as the source of randomness.
             var mlContext = new MLContext();
 
-            // Create training and validation datasets.
+            // Create the dataset.
             var samples = GenerateData();
 
             // Convert the dataset to an IDataView.
             var data = mlContext.Data.LoadFromEnumerable(samples);
 
-            // Create train and set set.
+            // Create training and validation sets.
             var dataSets = mlContext.Data.TrainTestSplit(data);
             var trainSet = dataSets.TrainSet;
             var validSet = dataSets.TestSet;
 
             // Create a GAM trainer.
-            // Use a small number of bins for this example.
+            // Use a small number of bins for this example. The setting below means for each feature, 
+            // we divide its range into 16 discrete regions for the training process. Note that these
+            // regions are not evenly spaced, and that the final model may contain fewer bins, as
+            // neighboring bins with identical values will be combined. In general, we recommend using
+            // at least the default number of bins, as a small number of bins limits the capacity of
+            // the model.
             var trainer = mlContext.Regression.Trainers.Gam(
                 new GamRegressionTrainer.Options { MaximumBinCountPerFeature = 16 });
 
-            // Fit the model to the data using a validation set.
-            // GAM will use a technique called validation pruning to tune the model after training
-            // to improve generalization.
+            // Fit the model using both of training and validation sets. GAM can use a technique called 
+            // pruning to tune the model to the validation set after training to improve generalization.
             var model = trainer.Fit(trainSet, validSet);
 
             // Extract the model parameters.
             var gam = model.Model;
 
-            // Now investigate the bias and shape functions of the GAM model.
-            // The bias represents the average prediction for the training data.
+            // Now we can inspect the parameters of the Generalized Additive Model to understand the fit
+            // and potentially learn about our dataset.
+            // First, we will look at the bias; the bias represents the average prediction for the training data.
             Console.WriteLine($"Average prediction: {gam.Bias:0.00}");
 
             // Let's take a look at the features that the model built. Similar to a linear model, we have
@@ -55,11 +60,12 @@ public static void Example()
 
                 // Get the bin upper bounds for the feature.
                 var binUpperBounds = gam.GetBinUpperBounds(i);
-                // Get the bin effects; these are the effect size for each bin.
+
+                // Get the bin effects; these are the function values for each bin.
                 var binEffects = gam.GetBinEffects(i);
 
                 // Now, write the function to the console. The function is a set of bins, and the corresponding
-                // function values. You can think of GAMs as building a bar-chart lookup table.
+                // function values. You can think of GAMs as building a bar-chart or lookup table for each feature.
                 Console.WriteLine($"Feature{i}");
                 for (int j = 0; j < binUpperBounds.Count; j++)
                     Console.WriteLine($"x < {binUpperBounds[j]:0.00} => {binEffects[j]:0.000}");

From a717ac54600344f05d03612674df46aa77556c2c Mon Sep 17 00:00:00 2001
From: Rogan Carr <rocarr@microsoft.com>
Date: Fri, 12 Apr 2019 14:02:33 -0700
Subject: [PATCH 8/8] Addressing PR comments.

---
 .../Trainers/BinaryClassification/Gam.cs      |  5 ++-
 .../BinaryClassification/GamWithOptions.cs    | 13 ++++--
 .../Dynamic/Trainers/Regression/Gam.cs        |  5 ++-
 .../Trainers/Regression/GamWithOptions.cs     | 41 +++++++++++++++++--
 4 files changed, 54 insertions(+), 10 deletions(-)

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs
index a9f54f7f65..3b9c36644f 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs
@@ -123,14 +123,15 @@ private class Data
         private static IEnumerable<Data> GenerateData(int numExamples = 25000, int seed = 1)
         {
             var rng = new Random(seed);
+            float centeredFloat() => (float)(rng.NextDouble() - 0.5);
             for (int i = 0; i < numExamples; i++)
             {
                 // Generate random, uncoupled features.
                 var data = new Data {
-                    Features = new float[2] { (float)(rng.NextDouble() - 0.5), (float)(rng.NextDouble() - 0.5) }
+                    Features = new float[2] { centeredFloat(), centeredFloat() }
                 };
                 // Compute the label from the shape functions and add noise.
-                data.Label = Sigmoid(Parabola(data.Features[0]) + SimplePiecewise(data.Features[1]) + (float)(rng.NextDouble() - 0.5)) > 0.5;
+                data.Label = Sigmoid(Parabola(data.Features[0]) + SimplePiecewise(data.Features[1]) + centeredFloat()) > 0.5;
 
                 yield return data;
             }
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/GamWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/GamWithOptions.cs
index 40697a2322..e4a408a3ae 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/GamWithOptions.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/GamWithOptions.cs
@@ -34,8 +34,14 @@ public static void Example()
             // neighboring bins with identical values will be combined. In general, we recommend using
             // at least the default number of bins, as a small number of bins limits the capacity of
             // the model.
+            // Also, set the learning rate to half the default to slow down the gradient descent, and
+            // double the number of iterations to compensate.
             var trainer = mlContext.BinaryClassification.Trainers.Gam(
-                new GamBinaryTrainer.Options { MaximumBinCountPerFeature = 16 });
+                new GamBinaryTrainer.Options {
+                    NumberOfIterations = 19000,
+                    MaximumBinCountPerFeature = 16,
+                    LearningRate = 0.001
+                });
 
             // Fit the model using both of training and validation sets. GAM can use a technique called 
             // pruning to tune the model to the validation set after training to improve generalization.
@@ -125,15 +131,16 @@ private class Data
         private static IEnumerable<Data> GenerateData(int numExamples = 25000, int seed = 1)
         {
             var rng = new Random(seed);
+            float centeredFloat() => (float)(rng.NextDouble() - 0.5);
             for (int i = 0; i < numExamples; i++)
             {
                 // Generate random, uncoupled features.
                 var data = new Data
                 {
-                    Features = new float[2] { (float)(rng.NextDouble() - 0.5), (float)(rng.NextDouble() - 0.5) }
+                    Features = new float[2] { centeredFloat(), centeredFloat() }
                 };
                 // Compute the label from the shape functions and add noise.
-                data.Label = Sigmoid(Parabola(data.Features[0]) + SimplePiecewise(data.Features[1]) + (float)(rng.NextDouble() - 0.5)) > 0.5;
+                data.Label = Sigmoid(Parabola(data.Features[0]) + SimplePiecewise(data.Features[1]) + centeredFloat()) > 0.5;
 
                 yield return data;
             }
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.cs
index ac4aebff2d..b070dfbda1 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.cs
@@ -129,14 +129,15 @@ private static IEnumerable<Data> GenerateData(int numExamples = 25000, int seed
         {
             float bias = 1.0f;
             var rng = new Random(seed);
+            float centeredFloat() => (float)(rng.NextDouble() - 0.5);
             for (int i = 0; i < numExamples; i++)
             {
                 // Generate random, uncoupled features.
                 var data = new Data {
-                    Features = new float[2] { (float)(rng.NextDouble() - 0.5), (float)(rng.NextDouble() - 0.5) }
+                    Features = new float[2] { centeredFloat(), centeredFloat() }
                 };
                 // Compute the label from the shape functions and add noise.
-                data.Label = bias + Parabola(data.Features[0]) + SimplePiecewise(data.Features[1]) + (float)(rng.NextDouble() - 0.5);
+                data.Label = bias + Parabola(data.Features[0]) + SimplePiecewise(data.Features[1]) + centeredFloat();
 
                 yield return data;
             }
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.cs
index d59cae4db0..6c973814fd 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.cs
@@ -34,8 +34,12 @@ public static void Example()
             // neighboring bins with identical values will be combined. In general, we recommend using
             // at least the default number of bins, as a small number of bins limits the capacity of
             // the model.
+            // Also, change the pruning metrics to use the mean absolute error for pruning.
             var trainer = mlContext.Regression.Trainers.Gam(
-                new GamRegressionTrainer.Options { MaximumBinCountPerFeature = 16 });
+                new GamRegressionTrainer.Options {
+                    MaximumBinCountPerFeature = 16,
+                    PruningMetrics = 1
+                });
 
             // Fit the model using both of training and validation sets. GAM can use a technique called 
             // pruning to tune the model to the validation set after training to improve generalization.
@@ -112,6 +116,36 @@ public static void Example()
             // Distillation." <a href='https://arxiv.org/abs/1710.06169'>arXiv:1710.06169</a>."
         }
 
+//        Feature0
+//x< -0.44 => 0.131
+//x< -0.38 => 0.067
+//x< -0.32 => 0.041
+//x< -0.26 => -0.005
+//x< -0.20 => -0.035
+//x< -0.13 => -0.050
+//x< -0.07 => -0.079
+//x< -0.01 => -0.083
+//x< 0.06 => -0.079
+//x< 0.12 => -0.075
+//x< 0.18 => -0.052
+//x< 0.25 => -0.030
+//x< 0.31 => -0.002
+//x< 0.37 => 0.041
+//x< 0.44 => 0.084
+//x< ∞ => 0.126
+
+//Feature1
+//x< -0.37 => -0.255
+//x< -0.25 => -0.247
+//x< 0.00 => -0.249
+//x< 0.06 => 0.671
+//x< 0.12 => 0.743
+//x< 0.24 => 0.746
+//x< 0.31 => -0.143
+//x< 0.37 => -0.245
+//x< 0.43 => -0.261
+//x< ∞ => -0.257
+
         private class Data
         {
             public float Label { get; set; }
@@ -131,15 +165,16 @@ private static IEnumerable<Data> GenerateData(int numExamples = 25000, int seed
         {
             float bias = 1.0f;
             var rng = new Random(seed);
+            float centeredFloat() => (float)(rng.NextDouble() - 0.5);
             for (int i = 0; i < numExamples; i++)
             {
                 // Generate random, uncoupled features.
                 var data = new Data
                 {
-                    Features = new float[2] { (float)(rng.NextDouble() - 0.5), (float)(rng.NextDouble() - 0.5) }
+                    Features = new float[2] { centeredFloat(), centeredFloat() }
                 };
                 // Compute the label from the shape functions and add noise.
-                data.Label = bias + Parabola(data.Features[0]) + SimplePiecewise(data.Features[1]) + (float)(rng.NextDouble() - 0.5);
+                data.Label = bias + Parabola(data.Features[0]) + SimplePiecewise(data.Features[1]) + centeredFloat();
 
                 yield return data;
             }