diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs
new file mode 100644
index 0000000000..3b9c36644f
--- /dev/null
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs
@@ -0,0 +1,154 @@
+﻿using System;
+using System.Collections.Generic;
+using Microsoft.ML;
+using Microsoft.ML.Data;
+
+namespace Samples.Dynamic.Trainers.BinaryClassification
+{
+    public static class Gam
+    {
+        // This example requires installation of additional NuGet package
+        // <a href="https://www.nuget.org/packages/Microsoft.ML.FastTree/">Microsoft.ML.FastTree</a>.
+        public static void Example()
+        {
+            // Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
+            // as a catalog of available operations and as the source of randomness.
+            var mlContext = new MLContext();
+            
+            // Create the dataset.
+            var samples = GenerateData();
+
+            // Convert the dataset to an IDataView.
+            var data = mlContext.Data.LoadFromEnumerable(samples);
+
+            // Create training and validation sets.
+            var dataSets = mlContext.Data.TrainTestSplit(data);
+            var trainSet = dataSets.TrainSet;
+            var validSet = dataSets.TestSet;
+
+            // Create a GAM trainer.
+            // Use a small number of bins for this example. The setting below means for each feature, 
+            // we divide its range into 16 discrete regions for the training process. Note that these
+            // regions are not evenly spaced, and that the final model may contain fewer bins, as
+            // neighboring bins with identical values will be combined. In general, we recommend using
+            // at least the default number of bins, as a small number of bins limits the capacity of
+            // the model.
+            var trainer = mlContext.BinaryClassification.Trainers.Gam(maximumBinCountPerFeature: 16);
+
+            // Fit the model using both of training and validation sets. GAM can use a technique called 
+            // pruning to tune the model to the validation set after training to improve generalization.
+            var model = trainer.Fit(trainSet, validSet);
+            
+            // Extract the model parameters.
+            var gam = model.Model.SubModel;
+
+            // Now we can inspect the parameters of the Generalized Additive Model to understand the fit
+            // and potentially learn about our dataset.
+            // First, we will look at the bias; the bias represents the average prediction for the training data.
+            Console.WriteLine($"Average prediction: {gam.Bias:0.00}");
+
+            // Now look at the shape functions that the model has learned. Similar to a linear model, we have
+            // one response per feature, and they are independent. Unlike a linear model, this response is a 
+            // generic function instead of a line. Because we have included a bias term, each feature response 
+            // represents the deviation from the average prediction as a function of the feature value.
+            for (int i = 0; i < gam.NumberOfShapeFunctions; i++)
+            {
+                // Break a line.
+                Console.WriteLine();
+
+                // Get the bin upper bounds for the feature.
+                var binUpperBounds = gam.GetBinUpperBounds(i);
+
+                // Get the bin effects; these are the function values for each bin.
+                var binEffects = gam.GetBinEffects(i);
+
+                // Now, write the function to the console. The function is a set of bins, and the corresponding
+                // function values. You can think of GAMs as building a bar-chart or lookup table for each feature.
+                Console.WriteLine($"Feature{i}");
+                for (int j = 0; j < binUpperBounds.Count; j++)
+                    Console.WriteLine($"x < {binUpperBounds[j]:0.00} => {binEffects[j]:0.000}");
+            }
+
+            // Expected output:
+            //  Average prediction: 0.82
+            //
+            //  Feature0
+            //  x < -0.44 => 0.286
+            //  x < -0.38 => 0.225
+            //  x < -0.32 => 0.048
+            //  x < -0.26 => -0.110
+            //  x < -0.20 => -0.116
+            //  x < 0.18 => -0.143
+            //  x < 0.25 => -0.115
+            //  x < 0.31 => -0.005
+            //  x < 0.37 => 0.097
+            //  x < 0.44 => 0.263
+            //  x < ∞ => 0.284
+            //
+            //  Feature1
+            //  x < 0.00 => -0.350
+            //  x < 0.24 => 0.875
+            //  x < 0.31 => -0.138
+            //  x < ∞ => -0.188
+
+            // Let's consider this output. To score a given example, we look up the first bin where the inequality
+            // is satisfied for the feature value. We can look at the whole function to get a sense for how the
+            // model responds to the variable on a global level.
+            // The model can be seen to reconstruct the parabolic and step-wise function, shifted with respect to the average
+            // expected output over the training set. Very few bins are used to model the second feature because the GAM model
+            // discards unchanged bins to create smaller models.
+            // One last thing to notice is that these feature functions can be noisy. While we know that Feature1 should be
+            // symmetric, this is not captured in the model. This is due to noise in the data. Common practice is to use 
+            // resampling methods to estimate a confidence interval at each bin. This will help to determine if the effect is 
+            // real or just sampling noise. See for example:
+            // Tan, Caruana, Hooker, and Lou. "Distill-and-Compare: Auditing Black-Box Models Using Transparent Model 
+            // Distillation." <a href='https://arxiv.org/abs/1710.06169'>arXiv:1710.06169</a>."
+        }
+
+        private class Data
+        {
+            public bool Label { get; set; }
+
+            [VectorType(2)]
+            public float[] Features { get; set; }
+        }
+
+        /// <summary>
+        /// Creates a dataset, an IEnumerable of Data objects, for a GAM sample. Feature1 is a parabola centered around 0,
+        /// while Feature2 is a simple piecewise function.
+        /// </summary>
+        /// <param name="numExamples">The number of examples to generate.</param>
+        /// <param name="seed">The seed for the random number generator used to produce data.</param>
+        /// <returns></returns>
+        private static IEnumerable<Data> GenerateData(int numExamples = 25000, int seed = 1)
+        {
+            var rng = new Random(seed);
+            float centeredFloat() => (float)(rng.NextDouble() - 0.5);
+            for (int i = 0; i < numExamples; i++)
+            {
+                // Generate random, uncoupled features.
+                var data = new Data {
+                    Features = new float[2] { centeredFloat(), centeredFloat() }
+                };
+                // Compute the label from the shape functions and add noise.
+                data.Label = Sigmoid(Parabola(data.Features[0]) + SimplePiecewise(data.Features[1]) + centeredFloat()) > 0.5;
+
+                yield return data;
+            }
+        }
+
+        private static float Parabola(float x) => x * x;
+
+        private static float SimplePiecewise(float x)
+        {
+            if (x < 0)
+                return 0;
+            else if (x < 0.25)
+                return 1;
+            else
+                return 0;
+        }
+
+        private static double Sigmoid(double x) => 1.0 / (1.0 + Math.Exp(-1 * x));
+    }
+}
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/GamWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/GamWithOptions.cs
new file mode 100644
index 0000000000..e4a408a3ae
--- /dev/null
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/GamWithOptions.cs
@@ -0,0 +1,163 @@
+﻿using System;
+using System.Collections.Generic;
+using Microsoft.ML;
+using Microsoft.ML.Data;
+using Microsoft.ML.Trainers.FastTree;
+
+namespace Samples.Dynamic.Trainers.BinaryClassification
+{
+    public static class GamWithOptions
+    {
+        // This example requires installation of additional NuGet package
+        // <a href="https://www.nuget.org/packages/Microsoft.ML.FastTree/">Microsoft.ML.FastTree</a>.
+        public static void Example()
+        {
+            // Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
+            // as a catalog of available operations and as the source of randomness.
+            var mlContext = new MLContext();
+
+            // Create the dataset.
+            var samples = GenerateData();
+
+            // Convert the dataset to an IDataView.
+            var data = mlContext.Data.LoadFromEnumerable(samples);
+
+            // Create training and validation datasets.
+            var dataSets = mlContext.Data.TrainTestSplit(data);
+            var trainSet = dataSets.TrainSet;
+            var validSet = dataSets.TestSet;
+
+            // Create a GAM trainer.
+            // Use a small number of bins for this example. The setting below means for each feature, 
+            // we divide its range into 16 discrete regions for the training process. Note that these
+            // regions are not evenly spaced, and that the final model may contain fewer bins, as
+            // neighboring bins with identical values will be combined. In general, we recommend using
+            // at least the default number of bins, as a small number of bins limits the capacity of
+            // the model.
+            // Also, set the learning rate to half the default to slow down the gradient descent, and
+            // double the number of iterations to compensate.
+            var trainer = mlContext.BinaryClassification.Trainers.Gam(
+                new GamBinaryTrainer.Options {
+                    NumberOfIterations = 19000,
+                    MaximumBinCountPerFeature = 16,
+                    LearningRate = 0.001
+                });
+
+            // Fit the model using both of training and validation sets. GAM can use a technique called 
+            // pruning to tune the model to the validation set after training to improve generalization.
+            var model = trainer.Fit(trainSet, validSet);
+
+            // Extract the model parameters.
+            var gam = model.Model.SubModel;
+
+            // Now we can inspect the parameters of the Generalized Additive Model to understand the fit
+            // and potentially learn about our dataset.
+            // First, we will look at the bias; the bias represents the average prediction for the training data.
+            Console.WriteLine($"Average prediction: {gam.Bias:0.00}");
+
+            // Now look at the shape functions that the model has learned. Similar to a linear model, we have
+            // one response per feature, and they are independent. Unlike a linear model, this response is a 
+            // generic function instead of a line. Because we have included a bias term, each feature response 
+            // represents the deviation from the average prediction as a function of the feature value.
+            for (int i = 0; i < gam.NumberOfShapeFunctions; i++)
+            {
+                // Break a line.
+                Console.WriteLine();
+
+                // Get the bin upper bounds for the feature.
+                var binUpperBounds = gam.GetBinUpperBounds(i);
+
+                // Get the bin effects; these are the function values for each bin.
+                var binEffects = gam.GetBinEffects(i);
+
+                // Now, write the function to the console. The function is a set of bins, and the corresponding
+                // function values. You can think of GAMs as building a bar-chart or lookup table for each feature.
+                Console.WriteLine($"Feature{i}");
+                for (int j = 0; j < binUpperBounds.Count; j++)
+                    Console.WriteLine($"x < {binUpperBounds[j]:0.00} => {binEffects[j]:0.000}");
+            }
+
+            // Expected output:
+            //  Average prediction: 0.82
+            //
+            //  Feature0
+            //  x < -0.44 => 0.286
+            //  x < -0.38 => 0.225
+            //  x < -0.32 => 0.048
+            //  x < -0.26 => -0.110
+            //  x < -0.20 => -0.116
+            //  x < 0.18 => -0.143
+            //  x < 0.25 => -0.115
+            //  x < 0.31 => -0.005
+            //  x < 0.37 => 0.097
+            //  x < 0.44 => 0.263
+            //  x < ∞ => 0.284
+            //
+            //  Feature1
+            //  x < 0.00 => -0.350
+            //  x < 0.24 => 0.875
+            //  x < 0.31 => -0.138
+            //  x < ∞ => -0.188
+
+            // Let's consider this output. To score a given example, we look up the first bin where the inequality
+            // is satisfied for the feature value. We can look at the whole function to get a sense for how the
+            // model responds to the variable on a global level.
+            // The model can be seen to reconstruct the parabolic and step-wise function, shifted with respect to the average
+            // expected output over the training set. Very few bins are used to model the second feature because the GAM model
+            // discards unchanged bins to create smaller models.
+            // One last thing to notice is that these feature functions can be noisy. While we know that Feature1 should be
+            // symmetric, this is not captured in the model. This is due to noise in the data. Common practice is to use 
+            // resampling methods to estimate a confidence interval at each bin. This will help to determine if the effect is 
+            // real or just sampling noise. See for example:
+            // Tan, Caruana, Hooker, and Lou. "Distill-and-Compare: Auditing Black-Box Models Using Transparent Model 
+            // Distillation." <a href='https://arxiv.org/abs/1710.06169'>arXiv:1710.06169</a>."
+        }
+
+        private class Data
+        {
+            public bool Label { get; set; }
+
+            [VectorType(2)]
+            public float[] Features { get; set; }
+        }
+
+        /// <summary>
+        /// Creates a dataset, an IEnumerable of Data objects, for a GAM sample. Feature1 is a parabola centered around 0,
+        /// while Feature2 is a simple piecewise function.
+        /// </summary>
+        /// <param name="numExamples">The number of examples to generate.</param>
+        /// <param name="seed">The seed for the random number generator used to produce data.</param>
+        /// <returns></returns>
+        private static IEnumerable<Data> GenerateData(int numExamples = 25000, int seed = 1)
+        {
+            var rng = new Random(seed);
+            float centeredFloat() => (float)(rng.NextDouble() - 0.5);
+            for (int i = 0; i < numExamples; i++)
+            {
+                // Generate random, uncoupled features.
+                var data = new Data
+                {
+                    Features = new float[2] { centeredFloat(), centeredFloat() }
+                };
+                // Compute the label from the shape functions and add noise.
+                data.Label = Sigmoid(Parabola(data.Features[0]) + SimplePiecewise(data.Features[1]) + centeredFloat()) > 0.5;
+
+                yield return data;
+            }
+        }
+
+        private static float Parabola(float x) => x * x;
+
+        private static float SimplePiecewise(float x)
+        {
+            if (x < 0)
+                return 0;
+            else if (x < 0.25)
+                return 1;
+            else
+                return 0;
+        }
+
+        private static double Sigmoid(double x) => 1.0 / (1.0 + Math.Exp(-1 * x));
+    }
+}
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.cs
index dd8107452a..b070dfbda1 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.cs
@@ -1,7 +1,7 @@
 ﻿using System;
-using System.Linq;
+using System.Collections.Generic;
 using Microsoft.ML;
-using Microsoft.ML.SamplesUtils;
+using Microsoft.ML.Data;
 
 namespace Samples.Dynamic.Trainers.Regression
 {
@@ -14,93 +14,145 @@ public static void Example()
             // Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
             // as a catalog of available operations and as the source of randomness.
             var mlContext = new MLContext();
+
+            // Create the dataset.
+            var samples = GenerateData();
+
+            // Convert the dataset to an IDataView.
+            var data = mlContext.Data.LoadFromEnumerable(samples);
+
+            // Create training and validation sets.
+            var dataSets = mlContext.Data.TrainTestSplit(data);
+            var trainSet = dataSets.TrainSet;
+            var validSet = dataSets.TestSet;
+
+            // Create a GAM trainer.
+            // Use a small number of bins for this example. The setting below means for each feature, 
+            // we divide its range into 16 discrete regions for the training process. Note that these
+            // regions are not evenly spaced, and that the final model may contain fewer bins, as
+            // neighboring bins with identical values will be combined. In general, we recommend using
+            // at least the default number of bins, as a small number of bins limits the capacity of
+            // the model.
+            var trainer = mlContext.Regression.Trainers.Gam(maximumBinCountPerFeature: 16);
+
+            // Fit the model using both of training and validation sets. GAM can use a technique called 
+            // pruning to tune the model to the validation set after training to improve generalization.
+            var model = trainer.Fit(trainSet, validSet);
             
-            // Read the Housing regression dataset
-            var data = DatasetUtils.LoadHousingRegressionDataset(mlContext);
-
-            var labelName = "MedianHomeValue";
-            var featureNames = data.Schema
-                .Select(column => column.Name) // Get the column names
-                .Where(name => name != labelName) // Drop the Label
-                .ToArray();
-
-            // Create a pipeline.
-            var pipeline =
-                // Concatenate the features to create a Feature vector.
-                mlContext.Transforms.Concatenate("Features", featureNames)
-                // Append a GAM regression trainer, setting the "MedianHomeValue" column as the label of the dataset,
-                // the "Features" column produced by concatenation as the features column,
-                // and use a small number of bins to make it easy to visualize in the console window.
-                // For real applications, it is recommended to start with the default number of bins.
-                .Append(mlContext.Regression.Trainers.Gam(labelColumnName: labelName, featureColumnName: "Features", maximumBinCountPerFeature: 16));
-
-            // Train the pipeline.
-            var trainedPipeline = pipeline.Fit(data);
-
-            // Extract the model from the pipeline.
-            var gamModel = trainedPipeline.LastTransformer.Model;
-
-            // Now investigate the bias and shape functions of the GAM model.
-            // The bias represents the average prediction for the training data.
-            Console.WriteLine($"Average predicted cost: {gamModel.Bias:0.00}");
+            // Extract the model parameters.
+            var gam = model.Model;
+
+            // Now we can inspect the parameters of the Generalized Additive Model to understand the fit
+            // and potentially learn about our dataset.
+            // First, we will look at the bias; the bias represents the average prediction for the training data.
+            Console.WriteLine($"Average prediction: {gam.Bias:0.00}");
+
+            // Now look at the shape functions that the model has learned. Similar to a linear model, we have
+            // one response per feature, and they are independent. Unlike a linear model, this response is a 
+            // generic function instead of a line. Because we have included a bias term, each feature response 
+            // represents the deviation from the average prediction as a function of the feature value.
+            for (int i = 0; i < gam.NumberOfShapeFunctions; i++)
+            {
+                // Break a line.
+                Console.WriteLine();
+
+                // Get the bin upper bounds for the feature.
+                var binUpperBounds = gam.GetBinUpperBounds(i);
+
+                // Get the bin effects; these are the function values for each bin.
+                var binEffects = gam.GetBinEffects(i);
+
+                // Now, write the function to the console. The function is a set of bins, and the corresponding
+                // function values. You can think of GAMs as building a bar-chart or lookup table for each feature.
+                Console.WriteLine($"Feature{i}");
+                for (int j = 0; j < binUpperBounds.Count; j++)
+                    Console.WriteLine($"x < {binUpperBounds[j]:0.00} => {binEffects[j]:0.000}");
+            }
 
             // Expected output:
-            //   Average predicted cost: 22.53
-
-            // Let's take a look at the features that the model built. Similar to a linear model, we have
-            // one response per feature. Unlike a linear model, this response is a function instead of a line.
-            // Each feature response represents the deviation from the average prediction as a function of the 
-            // feature value.
-
-            // Let's investigate the TeacherRatio variable. This is the ratio of students to teachers,
-            // so the higher it is, the more students a teacher has in their classroom.
-            // First, let's get the index of the variable we want to look at.
-            var studentTeacherRatioIndex = featureNames.ToList().FindIndex(str => str.Equals("TeacherRatio"));
-
-            // Next, let's get the array of histogram bin upper bounds from the model for this feature.
-            // For each feature, the shape function is calculated at `MaxBins` locations along the range of 
-            // values that the feature takes, and the resulting shape function can be seen as a histogram of
-            // effects.
-            var teacherRatioBinUpperBounds = gamModel.GetBinUpperBounds(studentTeacherRatioIndex);
-            // And the array of bin effects; these are the effect size for each bin.
-            var teacherRatioBinEffects = gamModel.GetBinEffects(studentTeacherRatioIndex);
-
-            // Now, write the function to the console. The function is a set of bins, and the corresponding
-            // function values. You can think of GAMs as building a bar-chart lookup table.
-            Console.WriteLine("Student-Teacher Ratio");
-            for (int i = 0; i < teacherRatioBinUpperBounds.Count; i++)
-                Console.WriteLine($"x < {teacherRatioBinUpperBounds[i]:0.00} => {teacherRatioBinEffects[i]:0.000}");
-
-            //  Expected output:
-            //    Student-Teacher Ratio
-            //    x < 14.55 =>  2.105
-            //    x < 14.75 =>  2.326
-            //    x < 15.40 =>  0.903
-            //    x < 16.50 =>  0.651
-            //    x < 17.15 =>  0.587
-            //    x < 17.70 =>  0.624
-            //    x < 17.85 =>  0.684
-            //    x < 18.35 => -0.315
-            //    x < 18.55 => -0.542
-            //    x < 18.75 => -0.083
-            //    x < 19.40 => -0.442
-            //    x < 20.55 => -0.649
-            //    x < 21.05 => -1.579
-            //    x <   ∞   =>  0.318
+            //  Average prediction: 1.33
+            //
+            //  Feature0
+            //  x < -0.44 => 0.128
+            //  x < -0.38 => 0.066
+            //  x < -0.32 => 0.040
+            //  x < -0.26 => -0.006
+            //  x < -0.20 => -0.035
+            //  x < -0.13 => -0.050
+            //  x < 0.06 => -0.077
+            //  x < 0.12 => -0.075
+            //  x < 0.18 => -0.052
+            //  x < 0.25 => -0.031
+            //  x < 0.31 => -0.002
+            //  x < 0.37 => 0.040
+            //  x < 0.44 => 0.083
+            //  x < ∞ => 0.123
+            //
+            //  Feature1
+            //  x < 0.00 => -0.245
+            //  x < 0.06 => 0.671
+            //  x < 0.24 => 0.723
+            //  x < 0.31 => -0.141
+            //  x < 0.37 => -0.241
+            //  x < ∞ => -0.248
 
             // Let's consider this output. To score a given example, we look up the first bin where the inequality
             // is satisfied for the feature value. We can look at the whole function to get a sense for how the
-            // model responds to the variable on a global level. For the student-teacher-ratio variable, we can see
-            // that smaller class sizes are predictive of a higher house value, while student-teacher ratios higher 
-            // than about 18 lead to lower predictions in house value. This makes intuitive sense, as smaller class 
-            // sizes are desirable and also indicative of better-funded schools, which could make buyers likely to
-            // pay more for the house.
-            
-            // Another thing to notice is that these feature functions can be noisy. See student-teacher ratios > 21.05.
-            // Common practice is to use resampling methods to estimate a confidence interval at each bin. This will
-            // help to determine if the effect is real or just sampling noise. See for example 
+            // model responds to the variable on a global level.
+            // The model can be seen to reconstruct the parabolic and step-wise function, shifted with respect to the average
+            // expected output over the training set. Very few bins are used to model the second feature because the GAM model
+            // discards unchanged bins to create smaller models.
+            // One last thing to notice is that these feature functions can be noisy. While we know that Feature1 should be
+            // symmetric, this is not captured in the model. This is due to noise in the data. Common practice is to use 
+            // resampling methods to estimate a confidence interval at each bin. This will help to determine if the effect is 
+            // real or just sampling noise. See for example:
             // Tan, Caruana, Hooker, and Lou. "Distill-and-Compare: Auditing Black-Box Models Using Transparent Model 
             // Distillation." <a href='https://arxiv.org/abs/1710.06169'>arXiv:1710.06169</a>."
         }
+
+        private class Data
+        {
+            public float Label { get; set; }
+
+            [VectorType(2)]
+            public float[] Features { get; set; }
+        }
+
+        /// <summary>
+        /// Creates a dataset, an IEnumerable of Data objects, for a GAM sample. Feature1 is a parabola centered around 0,
+        /// while Feature2 is a simple piecewise function.
+        /// </summary>
+        /// <param name="numExamples">The number of examples to generate.</param>
+        /// <param name="seed">The seed for the random number generator used to produce data.</param>
+        /// <returns></returns>
+        private static IEnumerable<Data> GenerateData(int numExamples = 25000, int seed = 1)
+        {
+            float bias = 1.0f;
+            var rng = new Random(seed);
+            float centeredFloat() => (float)(rng.NextDouble() - 0.5);
+            for (int i = 0; i < numExamples; i++)
+            {
+                // Generate random, uncoupled features.
+                var data = new Data {
+                    Features = new float[2] { centeredFloat(), centeredFloat() }
+                };
+                // Compute the label from the shape functions and add noise.
+                data.Label = bias + Parabola(data.Features[0]) + SimplePiecewise(data.Features[1]) + centeredFloat();
+
+                yield return data;
+            }
+        }
+
+        private static float Parabola(float x) => x * x;
+
+        private static float SimplePiecewise(float x)
+        {
+            if (x < 0)
+                return 0;
+            else if (x < 0.25)
+                return 1;
+            else
+                return 0;
+        }
     }
 }
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.cs
index 33617b2d94..6c973814fd 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.cs
@@ -1,6 +1,5 @@
 ﻿using System;
 using System.Collections.Generic;
-using System.Linq;
 using Microsoft.ML;
 using Microsoft.ML.Data;
 using Microsoft.ML.Trainers.FastTree;
@@ -15,92 +14,182 @@ public static void Example()
         {
             // Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
             // as a catalog of available operations and as the source of randomness.
-            // Setting the seed to a fixed number in this example to make outputs deterministic.
-            var mlContext = new MLContext(seed: 0);
-
-            // Create a list of training examples.
-            var examples = GenerateRandomDataPoints(1000);
-
-            // Convert the examples list to an IDataView object, which is consumable by ML.NET API.
-            var trainingData = mlContext.Data.LoadFromEnumerable(examples);
-
-            // Define trainer options.
-            var options = new GamRegressionTrainer.Options
+            var mlContext = new MLContext();
+
+            // Create the dataset.
+            var samples = GenerateData();
+
+            // Convert the dataset to an IDataView.
+            var data = mlContext.Data.LoadFromEnumerable(samples);
+
+            // Create training and validation sets.
+            var dataSets = mlContext.Data.TrainTestSplit(data);
+            var trainSet = dataSets.TrainSet;
+            var validSet = dataSets.TestSet;
+
+            // Create a GAM trainer.
+            // Use a small number of bins for this example. The setting below means for each feature, 
+            // we divide its range into 16 discrete regions for the training process. Note that these
+            // regions are not evenly spaced, and that the final model may contain fewer bins, as
+            // neighboring bins with identical values will be combined. In general, we recommend using
+            // at least the default number of bins, as a small number of bins limits the capacity of
+            // the model.
+            // Also, change the pruning metrics to use the mean absolute error for pruning.
+            var trainer = mlContext.Regression.Trainers.Gam(
+                new GamRegressionTrainer.Options {
+                    MaximumBinCountPerFeature = 16,
+                    PruningMetrics = 1
+                });
+
+            // Fit the model using both of training and validation sets. GAM can use a technique called 
+            // pruning to tune the model to the validation set after training to improve generalization.
+            var model = trainer.Fit(trainSet, validSet);
+
+            // Extract the model parameters.
+            var gam = model.Model;
+
+            // Now we can inspect the parameters of the Generalized Additive Model to understand the fit
+            // and potentially learn about our dataset.
+            // First, we will look at the bias; the bias represents the average prediction for the training data.
+            Console.WriteLine($"Average prediction: {gam.Bias:0.00}");
+
+            // Let's take a look at the features that the model built. Similar to a linear model, we have
+            // one response per feature. Unlike a linear model, this response is a function instead of a line.
+            // Each feature response represents the deviation from the average prediction as a function of the 
+            // feature value.
+            for (int i = 0; i < gam.NumberOfShapeFunctions; i++)
             {
-                // The entropy (regularization) coefficient.
-                EntropyCoefficient = 0.3,
-                // Reduce the number of iterations to 50.
-                NumberOfIterations = 50
-            };
-
-            // Define the trainer.
-            var pipeline = mlContext.Regression.Trainers.Gam(options);
-
-            // Train the model.
-            var model = pipeline.Fit(trainingData);
+                // Break a line.
+                Console.WriteLine();
 
-            // Create testing examples. Use different random seed to make it different from training data.
-            var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
+                // Get the bin upper bounds for the feature.
+                var binUpperBounds = gam.GetBinUpperBounds(i);
 
-            // Run the model on test data set.
-            var transformedTestData = model.Transform(testData);
+                // Get the bin effects; these are the function values for each bin.
+                var binEffects = gam.GetBinEffects(i);
 
-            // Convert IDataView object to a list.
-            var predictions = mlContext.Data.CreateEnumerable<Prediction>(transformedTestData, reuseRowObject: false).ToList();
-
-            // Look at 5 predictions
-            foreach (var p in predictions.Take(5))
-                Console.WriteLine($"Label: {p.Label:F3}, Prediction: {p.Score:F3}");
+                // Now, write the function to the console. The function is a set of bins, and the corresponding
+                // function values. You can think of GAMs as building a bar-chart or lookup table for each feature.
+                Console.WriteLine($"Feature{i}");
+                for (int j = 0; j < binUpperBounds.Count; j++)
+                    Console.WriteLine($"x < {binUpperBounds[j]:0.00} => {binEffects[j]:0.000}");
+            }
 
             // Expected output:
-            //   Label: 0.985, Prediction: 0.841
-            //   Label: 0.155, Prediction: 0.187
-            //   Label: 0.515, Prediction: 0.496
-            //   Label: 0.566, Prediction: 0.467
-            //   Label: 0.096, Prediction: 0.144
+            //  Average prediction: 1.33
+            //
+            //  Feature0
+            //  x < -0.44 => 0.128
+            //  x < -0.38 => 0.066
+            //  x < -0.32 => 0.040
+            //  x < -0.26 => -0.006
+            //  x < -0.20 => -0.035
+            //  x < -0.13 => -0.050
+            //  x < 0.06 => -0.077
+            //  x < 0.12 => -0.075
+            //  x < 0.18 => -0.052
+            //  x < 0.25 => -0.031
+            //  x < 0.31 => -0.002
+            //  x < 0.37 => 0.040
+            //  x < 0.44 => 0.083
+            //  x < ∞ => 0.123
+            //
+            //  Feature1
+            //  x < 0.00 => -0.245
+            //  x < 0.06 => 0.671
+            //  x < 0.24 => 0.723
+            //  x < 0.31 => -0.141
+            //  x < 0.37 => -0.241
+            //  x < ∞ => -0.248
+            
+            // Let's consider this output. To score a given example, we look up the first bin where the inequality
+            // is satisfied for the feature value. We can look at the whole function to get a sense for how the
+            // model responds to the variable on a global level.
+            // The model can be seen to reconstruct the parabolic and step-wise function, shifted with respect to the average
+            // expected output over the training set. Very few bins are used to model the second feature because the GAM model
+            // discards unchanged bins to create smaller models.
+            // One last thing to notice is that these feature functions can be noisy. While we know that Feature1 should be
+            // symmetric, this is not captured in the model. This is due to noise in the data. Common practice is to use 
+            // resampling methods to estimate a confidence interval at each bin. This will help to determine if the effect is 
+            // real or just sampling noise. See for example:
+            // Tan, Caruana, Hooker, and Lou. "Distill-and-Compare: Auditing Black-Box Models Using Transparent Model 
+            // Distillation." <a href='https://arxiv.org/abs/1710.06169'>arXiv:1710.06169</a>."
+        }
 
-            // Evaluate the overall metrics
-            var metrics = mlContext.Regression.Evaluate(transformedTestData);
-            Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
+//        Feature0
+//x< -0.44 => 0.131
+//x< -0.38 => 0.067
+//x< -0.32 => 0.041
+//x< -0.26 => -0.005
+//x< -0.20 => -0.035
+//x< -0.13 => -0.050
+//x< -0.07 => -0.079
+//x< -0.01 => -0.083
+//x< 0.06 => -0.079
+//x< 0.12 => -0.075
+//x< 0.18 => -0.052
+//x< 0.25 => -0.030
+//x< 0.31 => -0.002
+//x< 0.37 => 0.041
+//x< 0.44 => 0.084
+//x< ∞ => 0.126
+
+//Feature1
+//x< -0.37 => -0.255
+//x< -0.25 => -0.247
+//x< 0.00 => -0.249
+//x< 0.06 => 0.671
+//x< 0.12 => 0.743
+//x< 0.24 => 0.746
+//x< 0.31 => -0.143
+//x< 0.37 => -0.245
+//x< 0.43 => -0.261
+//x< ∞ => -0.257
+
+        private class Data
+        {
+            public float Label { get; set; }
 
-            // Expected output:
-            //   Mean Absolute Error: 0.06
-            //   Mean Squared Error: 0.01
-            //   Root Mean Squared Error: 0.08
-            //   RSquared: 0.93
+            [VectorType(2)]
+            public float[] Features { get; set; }
         }
 
-        private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int seed=0)
+        /// <summary>
+        /// Creates a dataset, an IEnumerable of Data objects, for a GAM sample. Feature1 is a parabola centered around 0,
+        /// while Feature2 is a simple piecewise function.
+        /// </summary>
+        /// <param name="numExamples">The number of examples to generate.</param>
+        /// <param name="seed">The seed for the random number generator used to produce data.</param>
+        /// <returns></returns>
+        private static IEnumerable<Data> GenerateData(int numExamples = 25000, int seed = 1)
         {
-            var random = new Random(seed);
-            float randomFloat() => (float)random.NextDouble();
-            for (int i = 0; i < count; i++)
+            float bias = 1.0f;
+            var rng = new Random(seed);
+            float centeredFloat() => (float)(rng.NextDouble() - 0.5);
+            for (int i = 0; i < numExamples; i++)
             {
-                var label = randomFloat();
-                yield return new DataPoint
+                // Generate random, uncoupled features.
+                var data = new Data
                 {
-                    Label = label,
-                    // Create random features that are correlated with label.
-                    Features = Enumerable.Repeat(label, 50).Select(x => x + randomFloat()).ToArray()
+                    Features = new float[2] { centeredFloat(), centeredFloat() }
                 };
+                // Compute the label from the shape functions and add noise.
+                data.Label = bias + Parabola(data.Features[0]) + SimplePiecewise(data.Features[1]) + centeredFloat();
+
+                yield return data;
             }
         }
 
-        // Example with label and 50 feature values. A data set is a collection of such examples.
-        private class DataPoint
-        {
-            public float Label { get; set; }
-            [VectorType(50)]
-            public float[] Features { get; set; }
-        }
+        private static float Parabola(float x) => x * x;
 
-        // Class used to capture predictions.
-        private class Prediction
+        private static float SimplePiecewise(float x)
         {
-            // Original label.
-            public float Label { get; set; }
-            // Predicted score from the trainer.
-            public float Score { get; set; }
+            if (x < 0)
+                return 0;
+            else if (x < 0.25)
+                return 1;
+            else
+                return 0;
         }
     }
-}
\ No newline at end of file
+}
diff --git a/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs b/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs
index 02ab57a7fa..7031a673da 100644
--- a/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs
+++ b/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs
@@ -173,6 +173,13 @@ public static FastTreeRankingTrainer FastTree(this RankingCatalog.RankingTrainer
         /// <param name="numberOfIterations">The number of iterations to use in learning the features.</param>
         /// <param name="maximumBinCountPerFeature">The maximum number of bins to use to approximate features.</param>
         /// <param name="learningRate">The learning rate. GAMs work best with a small learning rate.</param>
+        /// <example>
+        /// <format type="text/markdown">
+        /// <![CDATA[
+        /// [!code-csharp[Gam](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs)]
+        /// ]]>
+        /// </format>
+        /// </example>
         public static GamBinaryTrainer Gam(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
             string labelColumnName = DefaultColumnNames.Label,
             string featureColumnName = DefaultColumnNames.Features,
@@ -191,6 +198,13 @@ public static GamBinaryTrainer Gam(this BinaryClassificationCatalog.BinaryClassi
         /// </summary>
         /// <param name="catalog">The <see cref="BinaryClassificationCatalog"/>.</param>
         /// <param name="options">Trainer options.</param>
+        /// <example>
+        /// <format type="text/markdown">
+        /// <![CDATA[
+        /// [!code-csharp[Gam](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/GamWithOptions.cs)]
+        /// ]]>
+        /// </format>
+        /// </example>
         public static GamBinaryTrainer Gam(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
             GamBinaryTrainer.Options options)
         {
@@ -212,7 +226,7 @@ public static GamBinaryTrainer Gam(this BinaryClassificationCatalog.BinaryClassi
         /// <example>
         /// <format type="text/markdown">
         /// <![CDATA[
-        /// [!code-csharp[GamRegression](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.cs)]
+        /// [!code-csharp[Gam](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.cs)]
         /// ]]>
         /// </format>
         /// </example>
@@ -237,7 +251,7 @@ public static GamRegressionTrainer Gam(this RegressionCatalog.RegressionTrainers
         /// <example>
         /// <format type="text/markdown">
         /// <![CDATA[
-        /// [!code-csharp[GamRegression](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.cs)]
+        /// [!code-csharp[Gam](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.cs)]
         /// ]]>
         /// </format>
         /// </example>