diff --git a/src/Microsoft.ML.Core/Utilities/ReservoirSampler.cs b/src/Microsoft.ML.Core/Utilities/ReservoirSampler.cs
index a755788fb4..b8006bd943 100644
--- a/src/Microsoft.ML.Core/Utilities/ReservoirSampler.cs
+++ b/src/Microsoft.ML.Core/Utilities/ReservoirSampler.cs
@@ -47,7 +47,7 @@ public interface IReservoirSampler<T>
     /// This class produces a sample without replacement from a stream of data of type <typeparamref name="T"/>. 
     /// It is instantiated with a delegate that gets the next data point, and builds a reservoir in one pass by calling <see cref="Sample"/> 
     /// for every data point in the stream. In case the next data point does not get 'picked' into the reservoir, the delegate is not invoked.
-    /// Sampling is done according to the algorithm in this paper: <see href="http://epubs.siam.org/doi/pdf/10.1137/1.9781611972740.53"/>.
+    /// Sampling is done according to the algorithm in this paper: <a href="http://epubs.siam.org/doi/pdf/10.1137/1.9781611972740.53">http://epubs.siam.org/doi/pdf/10.1137/1.9781611972740.53</a>.
     /// </summary>
     public sealed class ReservoirSamplerWithoutReplacement<T> : IReservoirSampler<T>
     {
@@ -120,7 +120,7 @@ public IEnumerable<T> GetSample()
     /// This class produces a sample with replacement from a stream of data of type <typeparamref name="T"/>. 
     /// It is instantiated with a delegate that gets the next data point, and builds a reservoir in one pass by calling <see cref="Sample"/> 
     /// for every data point in the stream. In case the next data point does not get 'picked' into the reservoir, the delegate is not invoked.
-    /// Sampling is done according to the algorithm in this paper: <see href="http://epubs.siam.org/doi/pdf/10.1137/1.9781611972740.53"/>.
+    /// Sampling is done according to the algorithm in this paper: <a href="http://epubs.siam.org/doi/pdf/10.1137/1.9781611972740.53">http://epubs.siam.org/doi/pdf/10.1137/1.9781611972740.53</a>.
     /// </summary>
     public sealed class ReservoirSamplerWithReplacement<T> : IReservoirSampler<T>
     {
diff --git a/src/Microsoft.ML.Data/Evaluators/AucAggregator.cs b/src/Microsoft.ML.Data/Evaluators/AucAggregator.cs
index f45aacd58e..342e1d3529 100644
--- a/src/Microsoft.ML.Data/Evaluators/AucAggregator.cs
+++ b/src/Microsoft.ML.Data/Evaluators/AucAggregator.cs
@@ -408,7 +408,7 @@ public UnweightedAuPrcAggregator(IRandom rand, int reservoirSize)
 
             /// <summary>
             /// Compute the AUPRC using the "lower trapesoid" estimator, as described in the paper
-            /// <see href="http://www.ecmlpkdd2013.org/wp-content/uploads/2013/07/aucpr_2013ecml_corrected.pdf"/>.
+            /// <a href="http://www.ecmlpkdd2013.org/wp-content/uploads/2013/07/aucpr_2013ecml_corrected.pdf">http://www.ecmlpkdd2013.org/wp-content/uploads/2013/07/aucpr_2013ecml_corrected.pdf</a>.
             /// </summary>
             protected override Double ComputeWeightedAuPrcCore(out Double unweighted)
             {
@@ -482,7 +482,7 @@ public WeightedAuPrcAggregator(IRandom rand, int reservoirSize)
 
             /// <summary>
             /// Compute the AUPRC using the "lower trapesoid" estimator, as described in the paper
-            /// <see href="http://www.ecmlpkdd2013.org/wp-content/uploads/2013/07/aucpr_2013ecml_corrected.pdf"/>.
+            /// <a href="http://www.ecmlpkdd2013.org/wp-content/uploads/2013/07/aucpr_2013ecml_corrected.pdf">http://www.ecmlpkdd2013.org/wp-content/uploads/2013/07/aucpr_2013ecml_corrected.pdf</a>.
             /// </summary>
             protected override Double ComputeWeightedAuPrcCore(out Double unweighted)
             {
diff --git a/src/Microsoft.ML.Data/Transforms/NAFilter.cs b/src/Microsoft.ML.Data/Transforms/NAFilter.cs
index 7b94ff1e07..c8515291f3 100644
--- a/src/Microsoft.ML.Data/Transforms/NAFilter.cs
+++ b/src/Microsoft.ML.Data/Transforms/NAFilter.cs
@@ -26,6 +26,7 @@
 
 namespace Microsoft.ML.Runtime.Data
 {
+    /// <include file='doc.xml' path='doc/members/member[@name="NAFilter"]'/>
     public sealed class NAFilter : FilterBase
     {
         private static class Defaults
diff --git a/src/Microsoft.ML.Data/Transforms/TermTransform.cs b/src/Microsoft.ML.Data/Transforms/TermTransform.cs
index 7591179588..ea2248716e 100644
--- a/src/Microsoft.ML.Data/Transforms/TermTransform.cs
+++ b/src/Microsoft.ML.Data/Transforms/TermTransform.cs
@@ -29,14 +29,14 @@
 
 namespace Microsoft.ML.Runtime.Data
 {
-    /// <summary>
-    /// TermTransform builds up term vocabularies (dictionaries).
-    /// Notes:
-    /// * Each column builds/uses exactly one "vocabulary" (dictionary).
-    /// * Output columns are KeyType-valued.
-    /// * The Key value is the one-based index of the item in the dictionary.
-    /// * Not found is assigned the value zero.
-    /// </summary>
+
+    // TermTransform builds up term vocabularies (dictionaries).
+    // Notes:
+    // * Each column builds/uses exactly one "vocabulary" (dictionary).
+    // * Output columns are KeyType-valued.
+    // * The Key value is the one-based index of the item in the dictionary.
+    // * Not found is assigned the value zero.
+    /// <include file='doc.xml' path='doc/members/member[@name="TextToKey"]/*' />
     public sealed partial class TermTransform : OneToOneTransformBase, ITransformTemplate
     {
         public abstract class ColumnBase : OneToOneColumn
diff --git a/src/Microsoft.ML.Data/Transforms/doc.xml b/src/Microsoft.ML.Data/Transforms/doc.xml
new file mode 100644
index 0000000000..a3d4ba9f5e
--- /dev/null
+++ b/src/Microsoft.ML.Data/Transforms/doc.xml
@@ -0,0 +1,54 @@
+﻿<?xml version="1.0" encoding="utf-8" ?>
+<doc>
+  <members>
+    <member name="NAFilter">
+      <summary>
+        Removes missing values from vector type columns.
+      </summary>
+      <remarks>
+        This transform removes the entire row if any of the input columns have a missing value in that row.
+        This preprocessing is required for many ML algorithms that cannot work with missing values.
+        Useful if any missing entry invalidates the entire row.
+        If the <see cref="Microsoft.ML.Runtime.Data.NAFilter.Defaults.Complement"/> is set to true, this transform would do the exact opposite,
+        it will keep only the rows that have missing values.
+      </remarks>
+      <seealso cref="Microsoft.ML.Runtime.Data.MetadataUtils.Kinds.HasMissingValues"></seealso>
+    </member>
+    <example name="NAFilter">
+      <example>
+        <code language="csharp">
+          pipeline.Add(new MissingValuesRowDropper(&quot;Column1&quot;));
+        </code>
+      </example>
+    </example>
+
+    <member name="TextToKey">
+      <summary>
+        Converts input values (words, numbers, etc.) to index in a dictionary.
+      </summary>
+      <remarks>
+        The TextToKeyConverter transform builds up term vocabularies (dictionaries).
+        The TextToKey Converter and the <see cref="T:Microsoft.ML.Transforms.HashConverter"/> are the two one primary mechanisms by which raw input is transformed into keys.
+        If multiple columns are used, each column builds/uses exactly one vocabulary.
+        The output columns are KeyType-valued.
+        The Key value is the one-based index of the item in the dictionary.
+        If the key is not found in the dictionary, it is assigned the missing value indicator.
+        This dictionary mapping values to keys is most commonly learnt from the unique values in input data,
+        but can be defined through other means: either with the mapping defined directly on the command line, or as loaded from an external file.
+      </remarks>
+      <seealso cref="T:Microsoft.ML.Transforms.HashConverter"/>
+      <seealso cref="T:Microsoft.ML.Transforms.KeyToTextConverter"/>
+    </member>
+    <example name="TextToKey">
+      <example>
+        <code language="csharp">
+          pipeline.Add(new TextToKeyConverter((&quot;Column&quot;, &quot;OutColumn&quot;))
+          { 
+            Sort = TermTransformSortOrder.Occurrence 
+          });
+        </code>
+      </example>
+    </example>
+    
+  </members>
+</doc>
diff --git a/src/Microsoft.ML.FastTree/FastTreeArguments.cs b/src/Microsoft.ML.FastTree/FastTreeArguments.cs
index e6274e3155..2c6fc02745 100644
--- a/src/Microsoft.ML.FastTree/FastTreeArguments.cs
+++ b/src/Microsoft.ML.FastTree/FastTreeArguments.cs
@@ -20,7 +20,7 @@ public interface IFastTreeTrainerFactory : IComponentFactory<ITrainer>
     {
     }
 
-    /// <include file='./doc.xml' path='docs/members/member[@name="FastTree"]/*' />
+    /// <include file='doc.xml' path='doc/members/member[@name="FastTree"]/*' />
     public sealed partial class FastTreeBinaryClassificationTrainer
     {
         [TlcModule.Component(Name = LoadNameValue, FriendlyName = UserNameValue, Desc = Summary)]
diff --git a/src/Microsoft.ML.FastTree/FastTreeClassification.cs b/src/Microsoft.ML.FastTree/FastTreeClassification.cs
index 4eca3e15fb..ac92867c27 100644
--- a/src/Microsoft.ML.FastTree/FastTreeClassification.cs
+++ b/src/Microsoft.ML.FastTree/FastTreeClassification.cs
@@ -100,7 +100,7 @@ public static IPredictorProducing<Float> Create(IHostEnvironment env, ModelLoadC
         public override PredictionKind PredictionKind => PredictionKind.BinaryClassification;
     }
 
-    /// <include file = './doc.xml' path='docs/members/member[@name="FastTree"]/*' />
+    /// <include file = 'doc.xml' path='doc/members/member[@name="FastTree"]/*' />
     public sealed partial class FastTreeBinaryClassificationTrainer :
         BoostingFastTreeTrainerBase<FastTreeBinaryClassificationTrainer.Arguments, IPredictorWithFeatureWeights<Float>>
     {
@@ -342,7 +342,8 @@ public static partial class FastTree
             Desc = FastTreeBinaryClassificationTrainer.Summary,
             UserName = FastTreeBinaryClassificationTrainer.UserNameValue,
             ShortName = FastTreeBinaryClassificationTrainer.ShortName,
-            XmlInclude = new[] { @"<include file='../Microsoft.ML.FastTree/doc.xml' path='docs/members/member[@name=""FastTree""]/*' />" })]
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.FastTree/doc.xml' path='doc/members/member[@name=""FastTree""]/*' />",
+                                 @"<include file='../Microsoft.ML.FastTree/doc.xml' path='doc/members/example[@name=""FastTreeBinaryClassifier""]/*' />" })]
         public static CommonOutputs.BinaryClassificationOutput TrainBinary(IHostEnvironment env, FastTreeBinaryClassificationTrainer.Arguments input)
         {
             Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.FastTree/FastTreeRanking.cs b/src/Microsoft.ML.FastTree/FastTreeRanking.cs
index 0e44553dee..cc246f25cd 100644
--- a/src/Microsoft.ML.FastTree/FastTreeRanking.cs
+++ b/src/Microsoft.ML.FastTree/FastTreeRanking.cs
@@ -38,7 +38,7 @@
 
 namespace Microsoft.ML.Runtime.FastTree
 {
-    /// <include file='./doc.xml' path='docs/members/member[@name="FastTree"]/*' />
+    /// <include file='doc.xml' path='doc/members/member[@name="FastTree"]/*' />
     public sealed partial class FastTreeRankingTrainer : BoostingFastTreeTrainerBase<FastTreeRankingTrainer.Arguments, FastTreeRankingPredictor>,
         IHasLabelGains
     {
@@ -1096,7 +1096,8 @@ public static partial class FastTree
             Desc = FastTreeRankingTrainer.Summary,
             UserName = FastTreeRankingTrainer.UserNameValue,
             ShortName = FastTreeRankingTrainer.ShortName,
-            XmlInclude = new[] { @"<include file='../Microsoft.ML.FastTree/doc.xml' path='docs/members/member[@name=""FastTree""]/*' />" })]
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.FastTree/doc.xml' path='doc/members/member[@name=""FastTree""]/*' />",
+                                 @"<include file='../Microsoft.ML.FastTree/doc.xml' path='doc/members/example[@name=""FastTreeRanker""]/*' />"})]
         public static CommonOutputs.RankingOutput TrainRanking(IHostEnvironment env, FastTreeRankingTrainer.Arguments input)
         {
             Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.FastTree/FastTreeRegression.cs b/src/Microsoft.ML.FastTree/FastTreeRegression.cs
index 1e78f4c473..287cfe9e1c 100644
--- a/src/Microsoft.ML.FastTree/FastTreeRegression.cs
+++ b/src/Microsoft.ML.FastTree/FastTreeRegression.cs
@@ -31,7 +31,7 @@
 
 namespace Microsoft.ML.Runtime.FastTree
 {
-    /// <include file='./doc.xml' path='docs/members/member[@name="FastTree"]/*' />
+    /// <include file='doc.xml' path='doc/members/member[@name="FastTree"]/*' />
     public sealed partial class FastTreeRegressionTrainer : BoostingFastTreeTrainerBase<FastTreeRegressionTrainer.Arguments, FastTreeRegressionPredictor>
     {
         public const string LoadNameValue = "FastTreeRegression";
@@ -445,7 +445,8 @@ public static partial class FastTree
             Desc = FastTreeRegressionTrainer.Summary,
             UserName = FastTreeRegressionTrainer.UserNameValue,
             ShortName = FastTreeRegressionTrainer.ShortName,
-            XmlInclude = new[] { @"<include file='../Microsoft.ML.FastTree/doc.xml' path='docs/members/member[@name=""FastTree""]/*' />" })]
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.FastTree/doc.xml' path='doc/members/member[@name=""FastTree""]/*' />",
+                                 @"<include file='../Microsoft.ML.FastTree/doc.xml' path='doc/members/example[@name=""FastTreeRegressor""]/*' />"})]
         public static CommonOutputs.RegressionOutput TrainRegression(IHostEnvironment env, FastTreeRegressionTrainer.Arguments input)
         {
             Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.FastTree/FastTreeTweedie.cs b/src/Microsoft.ML.FastTree/FastTreeTweedie.cs
index 6c1b56b1eb..d02928884f 100644
--- a/src/Microsoft.ML.FastTree/FastTreeTweedie.cs
+++ b/src/Microsoft.ML.FastTree/FastTreeTweedie.cs
@@ -30,7 +30,7 @@ namespace Microsoft.ML.Runtime.FastTree
     // The Tweedie boosting model follows the mathematics established in:
     // Yang, Quan, and Zou. "Insurance Premium Prediction via Gradient Tree-Boosted Tweedie Compound Poisson Models."
     // https://arxiv.org/pdf/1508.06378.pdf
-    /// <include file='./doc.xml' path='docs/members/member[@name="FastTreeTweedieRegression"]/*' />
+    /// <include file='doc.xml' path='doc/members/member[@name="FastTreeTweedieRegression"]/*' />
     public sealed partial class FastTreeTweedieTrainer : BoostingFastTreeTrainerBase<FastTreeTweedieTrainer.Arguments, FastTreeTweediePredictor>
     {
         public const string LoadNameValue = "FastTreeTweedieRegression";
@@ -454,7 +454,7 @@ public static partial class FastTree
             Desc = FastTreeTweedieTrainer.Summary,
             UserName = FastTreeTweedieTrainer.UserNameValue,
             ShortName = FastTreeTweedieTrainer.ShortName,
-            XmlInclude = new [] { @"<include file='../Microsoft.ML.FastTree/doc.xml' path='docs/members/member[@name=""FastTreeTweedieRegression""]/*' />" })]
+            XmlInclude = new [] { @"<include file='../Microsoft.ML.FastTree/doc.xml' path='doc/members/member[@name=""FastTreeTweedieRegression""]/*' />" })]
         public static CommonOutputs.RegressionOutput TrainTweedieRegression(IHostEnvironment env, FastTreeTweedieTrainer.Arguments input)
         {
             Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.FastTree/RandomForestClassification.cs b/src/Microsoft.ML.FastTree/RandomForestClassification.cs
index 612313fb93..512e79faf9 100644
--- a/src/Microsoft.ML.FastTree/RandomForestClassification.cs
+++ b/src/Microsoft.ML.FastTree/RandomForestClassification.cs
@@ -106,7 +106,7 @@ public static IPredictorProducing<Float> Create(IHostEnvironment env, ModelLoadC
         }
     }
 
-    /// <include file='./doc.xml' path='docs/members/member[@name="FastForest"]/*' />
+    /// <include file='doc.xml' path='doc/members/member[@name="FastForest"]/*' />
     public sealed partial class FastForestClassification :
         RandomForestTrainerBase<FastForestClassification.Arguments, IPredictorWithFeatureWeights<Float>>
     {
@@ -206,7 +206,8 @@ public static partial class FastForest
             Desc = FastForestClassification.Summary,
             UserName = FastForestClassification.UserNameValue,
             ShortName = FastForestClassification.ShortName,
-            XmlInclude = new[] { @"<include file='../Microsoft.ML.FastTree/doc.xml' path='docs/members/member[@name=""FastForest""]/*' />" })]
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.FastTree/doc.xml' path='doc/members/member[@name=""FastForest""]/*' />",
+                                 @"<include file='../Microsoft.ML.FastTree/doc.xml' path='doc/members/example[@name=""FastForestBinaryClassifier""]/*' />"})]
         public static CommonOutputs.BinaryClassificationOutput TrainBinary(IHostEnvironment env, FastForestClassification.Arguments input)
         {
             Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.FastTree/RandomForestRegression.cs b/src/Microsoft.ML.FastTree/RandomForestRegression.cs
index 7c45af3e54..c580851534 100644
--- a/src/Microsoft.ML.FastTree/RandomForestRegression.cs
+++ b/src/Microsoft.ML.FastTree/RandomForestRegression.cs
@@ -137,7 +137,7 @@ public ISchemaBindableMapper CreateMapper(Double[] quantiles)
         }
     }
 
-    /// <include file='./doc.xml' path='docs/members/member[@name="FastForest"]/*' />
+    /// <include file='doc.xml' path='doc/members/member[@name="FastForest"]/*' />
     public sealed partial class FastForestRegression : RandomForestTrainerBase<FastForestRegression.Arguments, FastForestRegressionPredictor>
     {
         public sealed class Arguments : FastForestArgumentsBase
@@ -277,7 +277,8 @@ public static partial class FastForest
             Desc = FastForestRegression.Summary,
             UserName = FastForestRegression.LoadNameValue,
             ShortName = FastForestRegression.ShortName,
-            XmlInclude = new[] { @"<include file='../Microsoft.ML.FastTree/doc.xml' path='docs/members/member[@name=""FastForest""]/*' />" })]
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.FastTree/doc.xml' path='doc/members/member[@name=""FastForest""]/*' />",
+                                 @"<include file='../Microsoft.ML.FastTree/doc.xml' path='doc/members/example[@name=""FastForestRegressor""]/*' />"})]
         public static CommonOutputs.RegressionOutput TrainRegression(IHostEnvironment env, FastForestRegression.Arguments input)
         {
             Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.FastTree/Training/Parallel/IParallelTraining.cs b/src/Microsoft.ML.FastTree/Training/Parallel/IParallelTraining.cs
index 525e60947b..08ae6fb16f 100644
--- a/src/Microsoft.ML.FastTree/Training/Parallel/IParallelTraining.cs
+++ b/src/Microsoft.ML.FastTree/Training/Parallel/IParallelTraining.cs
@@ -33,20 +33,20 @@ public delegate void FindBestThresholdFromRawArrayFun(LeafSplitCandidates leafSp
     /// <summary>
     /// Interface used for parallel training.
     /// Mainly contains three parts:
-    /// 1. interactive with IO: <see href="GetLocalBinConstructionFeatures" />, <see href="SyncGlobalBoundary" />.
+    /// 1. interactive with IO: <see cref="GetLocalBinConstructionFeatures" />, <see cref="SyncGlobalBoundary" />.
     ///    Data will be partitioned by rows in Data parallel and Voting Parallel.
     ///    To speed up the find bin process, it let different workers to find bins for different features.
     ///    Then perform global sync up.
     ///    In Feature parallel, every machines holds all data, so this is unneeded.
-    /// 2. interactive with TreeLearner: <see href="InitIteration" />, <see href="CacheHistogram" />, <see href="IsNeedFindLocalBestSplit" />, 
-    ///        <see href="IsSkipNonSplittableHistogram" />, <see href="FindGlobalBestSplit" />, <see href="GetGlobalDataCountInLeaf" />, <see href="PerformGlobalSplit" />.
+    /// 2. interactive with TreeLearner: <see cref="InitIteration" />, <see cref="CacheHistogram" />, <see cref="IsNeedFindLocalBestSplit" />, 
+    ///        <see cref="IsSkipNonSplittableHistogram" />, <see cref="FindGlobalBestSplit" />, <see cref="GetGlobalDataCountInLeaf" />, <see cref="PerformGlobalSplit" />.
     ///    A full process is:
-    ///        Use <see href="InitIteration" /> to alter local active features.
-    ///        Use <see href="GetGlobalDataCountInLeaf" /> to check smaller leaf and larger leaf.
-    ///        Use <see href="CacheHistogram" />, <see href="IsNeedFindLocalBestSplit" /> and <see href="IsSkipNonSplittableHistogram" /> to interactive with Feature histograms.
-    ///        Use <see href="FindGlobalBestSplit" /> to sync up global best split
-    ///        Use <see href="PerformGlobalSplit" /> to record global num_data in leaves.
-    /// 3. interactive with Application : <see href="GlobalMean" />.
+    ///        Use <see cref="InitIteration" /> to alter local active features.
+    ///        Use <see cref="GetGlobalDataCountInLeaf" /> to check smaller leaf and larger leaf.
+    ///        Use <see cref="CacheHistogram" />, <see cref="IsNeedFindLocalBestSplit" /> and <see cref="IsSkipNonSplittableHistogram" /> to interactive with Feature histograms.
+    ///        Use <see cref="FindGlobalBestSplit" /> to sync up global best split
+    ///        Use <see cref="PerformGlobalSplit" /> to record global num_data in leaves.
+    /// 3. interactive with Application : <see cref="GlobalMean" />.
     ///    Output of leaves is calculated by newton step ( - sum(first_order_gradients) / sum(second_order_gradients)).
     ///    If data is partitioned by row, it needs to a sync up for these sum result.
     ///    So It needs to call this to get the real output of leaves.
diff --git a/src/Microsoft.ML.FastTree/TreeEnsembleFeaturizer.cs b/src/Microsoft.ML.FastTree/TreeEnsembleFeaturizer.cs
index a7044d2502..577b012815 100644
--- a/src/Microsoft.ML.FastTree/TreeEnsembleFeaturizer.cs
+++ b/src/Microsoft.ML.FastTree/TreeEnsembleFeaturizer.cs
@@ -544,6 +544,7 @@ public ISchemaBoundMapper Bind(IHostEnvironment env, RoleMappedSchema schema)
         }
     }
 
+    /// <include file='doc.xml' path='doc/members/member[@name="TreeEnsembleFeaturizerTransform"]'/>
     public static class TreeEnsembleFeaturizerTransform
     {
         public sealed class Arguments : TrainAndScoreTransform.ArgumentsBase<SignatureTreeEnsembleTrainer>
@@ -802,7 +803,11 @@ private static IDataView AppendLabelTransform(IHostEnvironment env, IChannel ch,
 
     public static partial class TreeFeaturize
     {
-        [TlcModule.EntryPoint(Name = "Transforms.TreeLeafFeaturizer", Desc = TreeEnsembleFeaturizerTransform.TreeEnsembleSummary, UserName = TreeEnsembleFeaturizerTransform.UserName, ShortName = TreeEnsembleFeaturizerBindableMapper.LoadNameShort)]
+        [TlcModule.EntryPoint(Name = "Transforms.TreeLeafFeaturizer", 
+            Desc = TreeEnsembleFeaturizerTransform.TreeEnsembleSummary, 
+            UserName = TreeEnsembleFeaturizerTransform.UserName, 
+            ShortName = TreeEnsembleFeaturizerBindableMapper.LoadNameShort,
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.FastTree/doc.xml' path='doc/members/member[@name=""TreeEnsembleFeaturizerTransform""]'/>" })]
         public static CommonOutputs.TransformOutput Featurizer(IHostEnvironment env, TreeEnsembleFeaturizerTransform.ArgumentsForEntryPoint input)
         {
             Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.FastTree/doc.xml b/src/Microsoft.ML.FastTree/doc.xml
index 36f0b41f24..8678654182 100644
--- a/src/Microsoft.ML.FastTree/doc.xml
+++ b/src/Microsoft.ML.FastTree/doc.xml
@@ -1,5 +1,5 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
-<docs>
+<doc>
   <members>
 
     <member name="FastTree">
@@ -24,14 +24,54 @@
           The output of the ensemble produced by MART on a given instance is the sum of the tree outputs.
         </para>
         <list type='bullet'>
-          <item>In case of a binary classification problem, the output is converted to a probability by using some form of calibration.</item>
-          <item>In case of a regression problem, the output is the predicted value of the function.</item>
-          <item>In case of a ranking problem, the instances are ordered by the output value of the ensemble.</item>
+          <item><description>In case of a binary classification problem, the output is converted to a probability by using some form of calibration.</description></item>
+          <item><description>In case of a regression problem, the output is the predicted value of the function.</description></item>
+          <item><description>In case of a ranking problem, the instances are ordered by the output value of the ensemble.</description></item>
         </list>
-        <a href='https://en.wikipedia.org/wiki/Gradient_boosting#Gradient_tree_boosting'>Wikipedia: Gradient boosting (Gradient tree boosting)</a>.
-        <a href='http://projecteuclid.org/DPubS?service=UI&amp;version=1.0&amp;verb=Display&amp;handle=euclid.aos/1013203451'>Greedy function approximation: A gradient boosting machine.</a>.
-      </remarks>
+        <para>For more information see:</para>
+        <list>
+          <item><description><a href='https://en.wikipedia.org/wiki/Gradient_boosting#Gradient_tree_boosting'>Wikipedia: Gradient boosting (Gradient tree boosting).</a></description></item>
+          <item><description><a href='http://projecteuclid.org/DPubS?service=UI&amp;version=1.0&amp;verb=Display&amp;handle=euclid.aos/1013203451'>Greedy function approximation: A gradient boosting machine.</a></description></item>
+        </list>  
+    </remarks>
     </member>
+    <example name='FastTreeRanker'>
+      <example>
+        <code language="csharp">
+          new FastTreeRanker
+          {
+            SortingAlgorithm = "DescendingReverse",
+            OptimizationAlgorithm = BoostedTreeArgsOptimizationAlgorithmType.AcceleratedGradientDescent
+          }
+        </code>
+      </example>
+    </example>
+    <example name='FastTreeRegressor'>
+      <example>
+        <code language="csharp">
+          new FastTreeRegressor
+          {
+            NumTrees = 200,
+            EarlyStoppingRule = new GLEarlyStoppingCriterion(),
+            LearningRates = 0.4f,
+            DropoutRate = 0.05f
+          }
+        </code>
+      </example>
+    </example>
+    <example name='FastTreeBinaryClassifier'>
+      <example>
+        <code language="csharp">
+          new FastTreeBinaryClassifier
+          {
+            NumTrees = 100,
+            EarlyStoppingRule = new PQEarlyStoppingCriterion(),
+            LearningRates = 0.4f,
+            DropoutRate = 0.05f
+          }
+        </code>
+      </example>
+    </example>
     
     <member name="FastForest">
       <summary>
@@ -48,17 +88,45 @@
           <item><description>They perform integrated feature selection and classification. </description></item>
           <item><description>They are resilient in the presence of noisy features.</description></item>
         </list>
-        Fast forest is a random forest implementation.
+        <para>Fast forest is a random forest implementation.
         The model consists of an ensemble of decision trees. Each tree in a decision forest outputs a Gaussian distribution by way of prediction.
         An aggregation is performed over the ensemble of trees to find a Gaussian distribution closest to the combined distribution for all trees in the model.
-        This decision forest classifier consists of an ensemble of decision trees.
-        Generally, ensemble models provide better coverage and accuracy than single decision trees.
-        Each tree in a decision forest outputs a Gaussian distribution.
-        <a href='http://en.wikipedia.org/wiki/Random_forest'>Wikipedia: Random forest</a>
-        <a href='http://jmlr.org/papers/volume7/meinshausen06a/meinshausen06a.pdf'>Quantile regression forest</a>
-        <a href='https://blogs.technet.microsoft.com/machinelearning/2014/09/10/from-stumps-to-trees-to-forests/'>From Stumps to Trees to Forests</a>
+        This decision forest classifier consists of an ensemble of decision trees.</para>
+        <para>Generally, ensemble models provide better coverage and accuracy than single decision trees.
+         Each tree in a decision forest outputs a Gaussian distribution.</para>
+         <para>For more see: </para>
+        <list>
+          <item><description><a href='http://en.wikipedia.org/wiki/Random_forest'>Wikipedia: Random forest</a></description></item>
+          <item><description><a href='http://jmlr.org/papers/volume7/meinshausen06a/meinshausen06a.pdf'>Quantile regression forest</a></description></item>
+          <item><description><a href='https://blogs.technet.microsoft.com/machinelearning/2014/09/10/from-stumps-to-trees-to-forests/'>From Stumps to Trees to Forests</a></description></item>
+        </list>
       </remarks>
     </member>
+    <example name='FastForestBinaryClassifier'>
+      <example>
+        <code language="csharp">
+          new FastForestBinaryClassifier
+          {
+            NumTrees = 100,
+            NumLeaves = 50,
+            Calibrator = new FixedPlattCalibratorCalibratorTrainer()
+          }
+        </code>
+      </example>
+    </example>
+    <example name='FastForestRegressor'>
+      <example>
+        <code language="csharp">
+          new FastForestRegressor
+          {
+            NumTrees = 100,
+            NumLeaves = 50,
+            NumThreads = 5,
+            EntropyCoefficient = 0.3
+          }
+        </code>
+      </example>
+    </example>
 
     <member name="FastTreeTweedieRegression">
       <summary>
@@ -68,11 +136,57 @@
       <remarks>
         The Tweedie boosting model follows the mathematics established in <a href="https://arxiv.org/pdf/1508.06378.pdf">
         Insurance Premium Prediction via Gradient Tree-Boosted Tweedie Compound Poisson Models.</a> from Yang, Quan, and Zou. 
-        For an introduction to Gradient Boosting, and more information, see:
+        <para>For an introduction to Gradient Boosting, and more information, see:</para>
         <para><a href='https://en.wikipedia.org/wiki/Gradient_boosting#Gradient_tree_boosting'>Wikipedia: Gradient boosting (Gradient tree boosting)</a></para>
         <para><a href='http://projecteuclid.org/DPubS?service=UI&amp;version=1.0&amp;verb=Display&amp;handle=euclid.aos/1013203451'>Greedy function approximation: A gradient boosting machine</a></para>
       </remarks>
     </member>
+
+    <member name="TreeEnsembleFeaturizerTransform">
+      <summary>
+        Trains a tree ensemble, or loads it from a file, then maps a numeric feature vector
+        to three outputs:
+        <list>
+          <item><description>A vector containing the individual tree outputs of the tree ensemble.</description></item>
+          <item><description>A vector indicating the leaves that the feature vector falls on in the tree ensemble.</description></item>
+          <item><description>A vector indicating the paths that the feature vector falls on in the tree ensemble.</description></item>
+        </list>
+        If a both a model file and a trainer are specified - will use the model file. If neither are specified, 
+        will train a default FastTree model. 
+        This can handle key labels by training a regression model towards their optionally permuted indices.
+      </summary>
+      <remarks>
+        In machine learning​ it is a pretty common and powerful approach to utilize the already trained model in the process of defining features.
+        <para>One such example would be the use of model's scores as features to downstream models. For example, we might run clustering on the original features, 
+        and use the cluster distances as the new feature set.
+        Instead of consuming the model's output, we could go deeper, and extract the 'intermediate outputs' that are used to produce the final score. </para>
+        There are a number of famous or popular examples of this technique:
+        <list>
+          <item><description>A deep neural net trained on the ImageNet dataset, with the last layer removed, is commonly used to compute the 'projection' of the image into the 'semantic feature space'.
+            It is observed that the Euclidean distance in this space often correlates with the 'semantic similarity': that is, all pictures of pizza are located close together,
+            and far away from pictures of kittens. </description></item>
+          <item><description>A matrix factorization and/or LDA model is also often used to extract the 'latent topics' or 'latent features' associated with users and items.</description></item>
+          <item><description>The weights of the linear model are often used as a crude indicator of 'feature importance'. At the very minimum, the 0-weight features are not needed by the model,
+            and there's no reason to compute them. </description></item>
+        </list>
+        <para>Tree featurizer uses the decision tree ensembles for feature engineering in the same fashion as above.</para>
+        <para>Let's assume that we've built a tree ensemble of 100 trees with 100 leaves each (it doesn't matter whether boosting was used or not in training). 
+        If we associate each leaf of each tree with a sequential integer, we can, for every incoming example x, 
+        produce an indicator vector L(x), where Li(x) = 1 if the example x 'falls' into the leaf #i, and 0 otherwise.</para>
+        <para>Thus, for every example x, we produce a 10000-valued vector L, with exactly 100 1s and the rest zeroes. 
+        This 'leaf indicator' vector can be considered the ensemble-induced 'footprint' of the example.</para>
+        <para>The 'distance' between two examples in the L-space is actually a Hamming distance, and is equal to the number of trees that do not distinguish the two examples.</para>
+        <para>We could repeat the same thought process for the non-leaf, or internal, nodes of the trees (we know that each tree has exactly 99 of them in our 100-leaf example), 
+        and produce another indicator vector, N (size 9900), for each example, indicating the 'trajectory' of each example through each of the trees.</para>
+        <para>The distance in the combined 19900-dimensional LN-space will be equal to the number of 'decisions' in all trees that 'agree' on the given pair of examples.</para>
+        <para>The TreeLeafFeaturizer is also producing the third vector, T, which is defined as Ti(x) = output of tree #i on example x.</para>
+      </remarks>
+      <example>
+        <code language="csharp">
+          pipeline.Add(new TreeLeafFeaturizer())
+        </code>
+      </example>
+    </member>
         
   </members>
-</docs>
\ No newline at end of file
+</doc>
\ No newline at end of file
diff --git a/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs b/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs
index 3049e6a54c..3bab791fe1 100644
--- a/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs
+++ b/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs
@@ -28,7 +28,7 @@
 
 namespace Microsoft.ML.Runtime.KMeans
 {
-    /// <include file='./doc.xml' path='docs/members/member[@name="KMeans++"]/*' />
+    /// <include file='./doc.xml' path='doc/members/member[@name="KMeans++"]/*' />
     public class KMeansPlusPlusTrainer : TrainerBase<KMeansPredictor>
     {
         public const string LoadNameValue = "KMeansPlusPlus";
@@ -207,7 +207,8 @@ private static int ComputeNumThreads(IHost host, int? argNumThreads)
             Desc = Summary,
             UserName = UserNameValue,
             ShortName = ShortName,
-            XmlInclude = new[] { @"<include file='../Microsoft.ML.KMeansClustering/doc.xml' path='docs/members/member[@name=""KMeans++""]/*' />" })]
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.KMeansClustering/doc.xml' path='doc/members/member[@name=""KMeans++""]/*' />",
+                                 @"<include file='../Microsoft.ML.KMeansClustering/doc.xml' path='doc/members/example[@name=""KMeans++""]/*' />"})]
         public static CommonOutputs.ClusteringOutput TrainKMeans(IHostEnvironment env, Arguments input)
         {
             Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.KMeansClustering/doc.xml b/src/Microsoft.ML.KMeansClustering/doc.xml
index affaeabf98..a1590595dc 100644
--- a/src/Microsoft.ML.KMeansClustering/doc.xml
+++ b/src/Microsoft.ML.KMeansClustering/doc.xml
@@ -1,5 +1,5 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
-<docs>
+<doc>
   <members>
 
     <member name="KMeans++">
@@ -13,10 +13,24 @@
         YYK-Means observes that there is a lot of redundancy across iterations in the KMeans algorithms and most points do not change their clusters during an iteration.
         It uses various bounding techniques to identify this redundancy and eliminate many distance computations and optimize centroid computations.
         <para>For more information on K-means, and K-means++ see:</para>
-        <para><a href='https://en.wikipedia.org/wiki/K-means_clustering'>K-means</a>.</para>
-        <para><a href='https://en.wikipedia.org/wiki/K-means%2b%2b'>K-means++</a></para>
+        <list>
+          <item><description><a href='https://en.wikipedia.org/wiki/K-means_clustering'>K-means</a></description></item>
+          <item><description><a href='https://en.wikipedia.org/wiki/K-means%2b%2b'>K-means++</a></description></item>
+        </list>
       </remarks>
     </member>
+    <example name="KMeans++">
+      <example>
+        <code language="csharp">
+          new KMeansPlusPlusClusterer
+          {
+            MaxIterations = 100,
+            NumThreads = 5,
+            InitAlgorithm = KMeansPlusPlusTrainerInitAlgorithm.KMeansParallel
+          }
+        </code>
+      </example>
+    </example>
    
   </members>
-</docs>
\ No newline at end of file
+</doc>
\ No newline at end of file
diff --git a/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs
index c25b196b0e..7ce9f42b10 100644
--- a/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs
+++ b/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs
@@ -78,7 +78,7 @@ public static IPredictorProducing<float> Create(IHostEnvironment env, ModelLoadC
         }
     }
 
-    /// <include file='./doc.xml' path='docs/members/member[@name="LightGBM"]/*' />
+    /// <include file='doc.xml' path='doc/members/member[@name="LightGBM"]/*' />
     public sealed class LightGbmBinaryTrainer : LightGbmTrainerBase<float, IPredictorWithFeatureWeights<float>>
     {
         internal const string UserName = "LightGBM Binary Classifier";
@@ -133,7 +133,8 @@ public static partial class LightGbm
             Desc = LightGbmBinaryTrainer.Summary,
             UserName = LightGbmBinaryTrainer.UserName, 
             ShortName = LightGbmBinaryTrainer.ShortName,
-            XmlInclude = new[] { @"<include file='../Microsoft.ML.LightGBM/doc.xml' path='docs/members/member[@name=""LightGBM""]/*' />" })]
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.LightGBM/doc.xml' path='doc/members/member[@name=""LightGBM""]/*' />",
+                                 @"<include file='../Microsoft.ML.LightGBM/doc.xml' path='doc/members/example[@name=""LightGbmBinaryClassifier""]/*' />"})]
         public static CommonOutputs.BinaryClassificationOutput TrainBinary(IHostEnvironment env, LightGbmArguments input)
         {
             Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs
index 97649feb2a..0534f0d660 100644
--- a/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs
+++ b/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs
@@ -18,7 +18,7 @@
 namespace Microsoft.ML.Runtime.LightGBM
 {
 
-    /// <include file='./doc.xml' path='docs/members/member[@name="LightGBM"]/*' />
+    /// <include file='doc.xml' path='doc/members/member[@name="LightGBM"]/*' />
     public sealed class LightGbmMulticlassTrainer : LightGbmTrainerBase<VBuffer<float>, OvaPredictor>
     {
         public const string Summary = "LightGBM Multi Class Classifier";
@@ -186,7 +186,8 @@ public static partial class LightGbm
             Desc = "Train a LightGBM multi class model.", 
             UserName = LightGbmMulticlassTrainer.Summary, 
             ShortName = LightGbmMulticlassTrainer.ShortName,
-            XmlInclude = new[] { @"<include file='../Microsoft.ML.LightGBM/doc.xml' path='docs/members/member[@name=""LightGBM""]/*' />" })]
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.LightGBM/doc.xml' path='doc/members/member[@name=""LightGBM""]/*' />",
+                                 @"<include file='../Microsoft.ML.LightGBM/doc.xml' path='doc/members/example[@name=""LightGbmClassifier""]/*' />"})]
         public static CommonOutputs.MulticlassClassificationOutput TrainMultiClass(IHostEnvironment env, LightGbmArguments input)
         {
             Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs
index 2f44a0ba9d..3bafb09ab3 100644
--- a/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs
+++ b/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs
@@ -68,7 +68,7 @@ public static LightGbmRankingPredictor Create(IHostEnvironment env, ModelLoadCon
         }
     }
 
-    /// <include file='./doc.xml' path='docs/members/member[@name="LightGBM"]/*' />
+    /// <include file='doc.xml' path='doc/members/member[@name="LightGBM"]/*' />
     public sealed class LightGbmRankingTrainer : LightGbmTrainerBase<float, LightGbmRankingPredictor>
     {
         public const string UserName = "LightGBM Ranking";
@@ -131,7 +131,8 @@ public static partial class LightGbm
             Desc = "Train a LightGBM ranking model.", 
             UserName = LightGbmRankingTrainer.UserName, 
             ShortName = LightGbmRankingTrainer.ShortName,
-            XmlInclude = new[] { @"<include file='../Microsoft.ML.LightGBM/doc.xml' path='docs/members/member[@name=""LightGBM""]/*' />" })]
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.LightGBM/doc.xml' path='doc/members/member[@name=""LightGBM""]/*' />",
+                                 @"<include file='../Microsoft.ML.LightGBM/doc.xml' path='doc/members/example[@name=""LightGbmRanker""]/*' />"})]
         public static CommonOutputs.RankingOutput TrainRanking(IHostEnvironment env, LightGbmArguments input)
         {
             Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs
index db2f1f268a..f1b8850a72 100644
--- a/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs
+++ b/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs
@@ -20,7 +20,7 @@
 
 namespace Microsoft.ML.Runtime.LightGBM
 {
-    /// <include file='./doc.xml' path='docs/members/member[@name="LightGBM"]/*' />
+    /// <include file='doc.xml' path='doc/members/member[@name="LightGBM"]/*' />
     public sealed class LightGbmRegressionPredictor : FastTreePredictionWrapper
     {
         public const string LoaderSignature = "LightGBMRegressionExec";
@@ -123,7 +123,8 @@ public static partial class LightGbm
             Desc = LightGbmRegressorTrainer.Summary, 
             UserName = LightGbmRegressorTrainer.UserNameValue, 
             ShortName = LightGbmRegressorTrainer.ShortName,
-            XmlInclude = new[] { @"<include file='../Microsoft.ML.LightGBM/doc.xml' path='docs/members/member[@name=""LightGBM""]/*' />" })]
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.LightGBM/doc.xml' path='doc/members/member[@name=""LightGBM""]/*' />",
+                                 @"<include file='../Microsoft.ML.LightGBM/doc.xml' path='doc/members/example[@name=""LightGbmRegressor""]/*' />"})]
         public static CommonOutputs.RegressionOutput TrainRegression(IHostEnvironment env, LightGbmArguments input)
         {
             Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.LightGBM/doc.xml b/src/Microsoft.ML.LightGBM/doc.xml
index 4d53265ae3..1fcd38dd7a 100644
--- a/src/Microsoft.ML.LightGBM/doc.xml
+++ b/src/Microsoft.ML.LightGBM/doc.xml
@@ -1,5 +1,5 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
-<docs>
+<doc>
   <members>
     
     <member name="LightGBM">
@@ -11,6 +11,69 @@
         <a href='https://github.com/Microsoft/LightGBM/wiki'>GitHub: LightGBM</a>
       </remarks>
     </member>
+    <example name='LightGbmBinaryClassifier'>
+      <example>
+        <code language="csharp">
+          new LightGbmBinaryClassifier
+          {
+            NumBoostRound = 200,
+            LearningRate = 0.5f,
+            NumLeaves = 32,
+            MinDataPerLeaf = 20
+          }
+        </code>
+      </example>
+    </example>
+    <example name='LightGbmClassifier'>
+      <example>
+        <code language="csharp">
+          new LightGbmClassifier
+          {
+            NumBoostRound = 200,
+            LearningRate = 0.5f,
+            NumLeaves = 32,
+            MinDataPerLeaf = 20
+          }
+        </code>
+      </example>
+    </example>
+    <example name='LightGbmRegressor'>
+      <example>
+        <code language="csharp">
+          new LightGbmRegressor
+          {
+            NumBoostRound = 100,
+            LearningRate = 0.5f,
+            NumLeaves = 32,
+            MinDataPerLeaf = 20,
+            Booster = new DartBoosterParameterFunction
+            {
+              XgboostDartMode = true,
+              UniformDrop = true
+            }
+          }
+        </code>
+      </example>
+    </example>
+    <example name='LightGbmRanker'>
+      <example>
+        <code language="csharp">
+          new LightGbmRanker
+          {
+            NumBoostRound = 100,
+            LearningRate = 0.5f,
+            NumLeaves = 32,
+            MinDataPerLeaf = 20,
+            Booster = new GbdtBoosterParameterFunction
+            {
+              MinSplitGain = 3,
+              MaxDepth = 200,
+              Subsample = 0.5
+            }
+          }
+        </code>
+      </example>
+    </example>
 
   </members>
-</docs>
\ No newline at end of file
+</doc>
\ No newline at end of file
diff --git a/src/Microsoft.ML.PCA/PcaTrainer.cs b/src/Microsoft.ML.PCA/PcaTrainer.cs
index bebaa49691..f90b405c4e 100644
--- a/src/Microsoft.ML.PCA/PcaTrainer.cs
+++ b/src/Microsoft.ML.PCA/PcaTrainer.cs
@@ -270,7 +270,8 @@ private static void PostProcess(VBuffer<Float>[] y, Float[] sigma, Float[] z, in
             Desc = "Train an PCA Anomaly model.",
             UserName = UserNameValue,
             ShortName = ShortName,
-            XmlInclude = new[] { @"<include file='../Microsoft.ML.PCA/doc.xml' path='docs/members/member[@name=""PCA""]/*' />" })]
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.PCA/doc.xml' path='doc/members/member[@name=""PCA""]/*' />",
+                                 @"<include file='../Microsoft.ML.PCA/doc.xml' path='doc/members/example[@name=""PcaAnomalyDetector""]/*' />" })]
         public static CommonOutputs.AnomalyDetectionOutput TrainPcaAnomaly(IHostEnvironment env, Arguments input)
         {
             Contracts.CheckValue(env, nameof(env));
@@ -290,7 +291,7 @@ public static CommonOutputs.AnomalyDetectionOutput TrainPcaAnomaly(IHostEnvironm
     // - - If the error is close to 0, the instance is considered normal (non-anomaly).
     // REVIEW: move the predictor to a different file and fold EigenUtils.cs to this file.
     // REVIEW: Include the above detail in the XML documentation file. 
-    /// <include file='./doc.xml' path='docs/members/member[@name="PCA"]/*' />
+    /// <include file='doc.xml' path='doc/members/member[@name="PCA"]/*' />
     public sealed class PcaPredictor : PredictorBase<Float>,
         IValueMapper,
         ICanGetSummaryAsIDataView,
diff --git a/src/Microsoft.ML.PCA/PcaTransform.cs b/src/Microsoft.ML.PCA/PcaTransform.cs
index 0807abc5ed..6efbead226 100644
--- a/src/Microsoft.ML.PCA/PcaTransform.cs
+++ b/src/Microsoft.ML.PCA/PcaTransform.cs
@@ -26,7 +26,7 @@
 
 namespace Microsoft.ML.Runtime.Data
 {
-    /// <include file='./doc.xml' path='docs/members/member[@name="PCA"]/*' />
+    /// <include file='doc.xml' path='doc/members/member[@name="PCA"]/*' />
     public sealed class PcaTransform : OneToOneTransformBase
     {
         public sealed class Arguments : TransformInputBase
@@ -541,7 +541,8 @@ private static void TransformFeatures(IExceptionContext ectx, ref VBuffer<Float>
             Desc = Summary,
             UserName = UserName, 
             ShortName = ShortName, 
-            XmlInclude = new[] { @"<include file='../Microsoft.ML.PCA/doc.xml' path='docs/members/member[@name=""PCA""]/*' />" })]
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.PCA/doc.xml' path='doc/members/member[@name=""PCA""]/*' />",
+                                 @"<include file='../Microsoft.ML.PCA/doc.xml' path='doc/members/example[@name=""PcaCalculator""]/*' />"})]
         public static CommonOutputs.TransformOutput Calculate(IHostEnvironment env, Arguments input)
         {
             var h = EntryPointUtils.CheckArgsAndCreateHost(env, "Pca", input);
diff --git a/src/Microsoft.ML.PCA/doc.xml b/src/Microsoft.ML.PCA/doc.xml
index 98d423b754..c4f0be7758 100644
--- a/src/Microsoft.ML.PCA/doc.xml
+++ b/src/Microsoft.ML.PCA/doc.xml
@@ -1,27 +1,50 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
-<docs>
+<doc>
   <members>
     
     <member name="PCA">
       <summary>
-        PCA is a dimensionality-reduction transform which computes the projection of the feature vector to onto a low-rank subspace. 
+        PCA is a dimensionality-reduction transform which computes the projection of the feature vector onto a low-rank subspace. 
       </summary>
       <remarks>
-      <a href='https://en.wikipedia.org/wiki/Principal_component_analysis'>Principle Component Analysis (PCA)</a> is a dimensionality-reduction transform which computes the projection of the feature vector to onto a low-rank subspace.
+      <a href='https://en.wikipedia.org/wiki/Principal_component_analysis'>Principle Component Analysis (PCA)</a> is a dimensionality-reduction algorithm which computes the projection of the feature vector to onto a low-rank subspace.
       Its training is done using the technique described in the paper: <a href='https://arxiv.org/pdf/1310.6304v2.pdf'>Combining Structured and Unstructured Randomness in Large Scale PCA</a>,
-      and the paper <see href='https://arxiv.org/pdf/0909.4061v2.pdf'>Finding Structure with Randomness: Probabilistic Algorithms for Constructing Approximate Matrix Decompositions</see>
-      <a href='http://web.stanford.edu/group/mmds/slides2010/Martinsson.pdf'>Randomized Methods for Computing the Singular Value Decomposition (SVD) of very large matrices</a>
-      <a href='https://arxiv.org/abs/0809.2274'>A randomized algorithm for principal component analysis</a>
-      <a href='http://users.cms.caltech.edu/~jtropp/papers/HMT11-Finding-Structure-SIREV.pdf'>Finding Structure with Randomness: Probabilistic Algorithms for Constructing Approximate Matrix Decompositions</a>
+      and the paper <a href='https://arxiv.org/pdf/0909.4061v2.pdf'>Finding Structure with Randomness: Probabilistic Algorithms for Constructing Approximate Matrix Decompositions</a>
+        <para>For more information, see also:</para>
+        <list>
+          <item><description>
+            <a href='http://web.stanford.edu/group/mmds/slides2010/Martinsson.pdf'>Randomized Methods for Computing the Singular Value Decomposition (SVD) of very large matrices</a>
+          </description></item>
+          <item><description>
+            <a href='https://arxiv.org/abs/0809.2274'>A randomized algorithm for principal component analysis</a>
+          </description></item>
+          <item><description>
+            <a href='http://users.cms.caltech.edu/~jtropp/papers/HMT11-Finding-Structure-SIREV.pdf'>Finding Structure with Randomness: Probabilistic Algorithms for Constructing Approximate Matrix Decompositions</a>
+          </description></item>
+        </list>
       </remarks>
+    </member>
+    <example name='PcaCalculator'>
       <example>
         An example of how to add the PcaCalculator transform to a pipeline with a column named &quot;Features&quot;.
-        <code>
+        <code language="csharp">
           string[] features = new string[&quot;Sepal length&quot;, &quot;Sepal width&quot;, &quot;Petal length&quot;, &quot;Petal width&quot;];
           pipeline.Add(new PcaCalculator(columns){ Rank = 3 });
         </code>
       </example>
-    </member>
+    </example>
+    <example name='PcaAnomalyDetector'>
+      <example>
+        <code language="csharp">
+          new PcaAnomalyDetector
+          {
+            Rank = 40,
+            Oversampling = 40,
+            NormalizeFeatures = Microsoft.ML.Models.NormalizeOption.Warn
+          }
+        </code>
+      </example>
+    </example>
     
   </members>
-</docs>
\ No newline at end of file
+</doc>
\ No newline at end of file
diff --git a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs
index 58c28b8712..62270763de 100644
--- a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs
+++ b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs
@@ -29,7 +29,7 @@ namespace Microsoft.ML.Runtime.FactorizationMachine
      [2] http://www.csie.ntu.edu.tw/~cjlin/papers/ffm.pdf
      [3] https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf
     */
-    /// <include file='./doc.xml' path='docs/members/member[@name="FieldAwareFactorizationMachineBinaryClassifier"]/*' />
+    /// <include file='doc.xml' path='doc/members/member[@name="FieldAwareFactorizationMachineBinaryClassifier"]/*' />
     public sealed class FieldAwareFactorizationMachineTrainer : TrainerBase<FieldAwareFactorizationMachinePredictor>
     {
         public const string Summary = "Train a field-aware factorization machine for binary classification";
@@ -365,7 +365,8 @@ public override FieldAwareFactorizationMachinePredictor Train(TrainContext conte
             Desc = Summary,
             UserName = UserName,
             ShortName = ShortName,
-            XmlInclude = new[] { @"<include file='../Microsoft.ML.StandardLearners/FactorizationMachine/doc.xml' path='docs/members/member[@name=""FieldAwareFactorizationMachineBinaryClassifier""]/*' />" })]
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.StandardLearners/FactorizationMachine/doc.xml' path='doc/members/member[@name=""FieldAwareFactorizationMachineBinaryClassifier""]/*' />",
+                                 @"<include file='../Microsoft.ML.StandardLearners/FactorizationMachine/doc.xml' path='doc/members/example[@name=""FieldAwareFactorizationMachineBinaryClassifier""]/*' />" })]
         public static CommonOutputs.BinaryClassificationOutput TrainBinary(IHostEnvironment env, Arguments input)
         {
             Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.StandardLearners/FactorizationMachine/doc.xml b/src/Microsoft.ML.StandardLearners/FactorizationMachine/doc.xml
index 2e72b2ea9f..f18bf60990 100644
--- a/src/Microsoft.ML.StandardLearners/FactorizationMachine/doc.xml
+++ b/src/Microsoft.ML.StandardLearners/FactorizationMachine/doc.xml
@@ -1,5 +1,5 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
-<docs>
+<doc>
   <members>
     
     <member name="FieldAwareFactorizationMachineBinaryClassifier">
@@ -9,34 +9,35 @@
       <remarks>
         Field Aware Factorization Machines use, in addition to the input variables, factorized parameters to model the interaction between pairs of variables.
         The algorithm is particularly useful for high dimensional datasets which can be very sparse (e.g. click-prediction for advertising systems).
-        An advantage of FFM over SVMs is that the training data does not need to be stored in memory, and the coefficients can be optimized directly.
-        <para> For a general idea of what Field-aware Factorization Machines are see: <a href='https://www.csie.ntu.edu.tw/~r01922136/slides/ffm.pdf'>Field Aware Factorization Machines</a>
+        <para>An advantage of FFM over SVMs is that the training data does not need to be stored in memory, and the coefficients can be optimized directly.
+          For a general idea of what Field-aware Factorization Machines are see: <a href='https://www.csie.ntu.edu.tw/~r01922136/slides/ffm.pdf'>Field Aware Factorization Machines</a>
         </para>
         <para>See references below for more details. 
         This trainer is essentially faster the one introduced in [2] because of some implemtation tricks[3].
         </para>
           <list >
             <item>
-              <description>
-                [1] <a href='http://www.csie.ntu.edu.tw/~cjlin/papers/ffm.pdf'>Field-aware Factorization Machines for CTR Prediction</a></description></item>
+              [1] <description><a href='http://www.csie.ntu.edu.tw/~cjlin/papers/ffm.pdf'>Field-aware Factorization Machines for CTR Prediction</a></description></item>
             <item>
-              <description>
-                [2] <a href='http://jmlr.org/papers/volume12/duchi11a/duchi11a.pdf'>Adaptive Subgradient Methods for Online Learning and Stochastic Optimization</a>
-              </description>
+              [2] <description><a href='http://jmlr.org/papers/volume12/duchi11a/duchi11a.pdf'>Adaptive Subgradient Methods for Online Learning and Stochastic Optimization</a></description>
             </item>
             <item>
-              <description>
-                [3] <a href='https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf'>An Improved Stochastic Gradient Method for Training Large-scale Field-aware Factorization Machine.</a>
-              </description>
+              [3] <description><a href='https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf'>An Improved Stochastic Gradient Method for Training Large-scale Field-aware Factorization Machine.</a></description>
             </item>
           </list>
       </remarks>
+    </member>
+    <example name="FieldAwareFactorizationMachineBinaryClassifier">
       <example>
-        <code>
-          pipeline.Add(new FieldAwareFactorizationMachineBinaryClassifier(){ LearningRate = 0.5f, Iter=2 });
+        <code language="csharp">
+          pipeline.Add(new FieldAwareFactorizationMachineBinaryClassifier
+          { 
+            LearningRate = 0.5f, 
+            Iter=2 
+          });
         </code>
       </example>
-    </member>
+    </example>
         
   </members>
-</docs>
\ No newline at end of file
+</doc>
\ No newline at end of file
diff --git a/src/Microsoft.ML.StandardLearners/Standard/LinearClassificationTrainer.cs b/src/Microsoft.ML.StandardLearners/Standard/LinearClassificationTrainer.cs
index 5ae866c5d7..81b38e6976 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/LinearClassificationTrainer.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/LinearClassificationTrainer.cs
@@ -1736,7 +1736,8 @@ public static partial class Sdca
             Desc = "Train an SDCA binary model.",
             UserName = LinearClassificationTrainer.UserNameValue,
             ShortName = LinearClassificationTrainer.LoadNameValue,
-            XmlInclude = new[] { @"<include file='../../docs/code/xmlIncludes/Learners.xml' path='docs/members/member[@name=""StochasticDualCoordinateAscentBinaryClassifier""]/*' />" })]
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.StandardLearners/Standard/doc.xml' path='doc/members/member[@name=""SDCA""]/*' />", 
+                                 @"<include file='../Microsoft.ML.StandardLearners/Standard/doc.xml' path='doc/members/example[@name=""StochasticDualCoordinateAscentBinaryClassifier""]/*'/>" })]
         public static CommonOutputs.BinaryClassificationOutput TrainBinary(IHostEnvironment env, LinearClassificationTrainer.Arguments input)
         {
             Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs
index f720b960a4..e896b69e1c 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs
@@ -30,8 +30,8 @@ namespace Microsoft.ML.Runtime.Learners
 {
     using Mkl = Microsoft.ML.Runtime.Learners.OlsLinearRegressionTrainer.Mkl;
 
-    /// <include file='./doc.xml' path='docs/members/member[@name="LBFGS"]/*' />
-    /// <include file='./doc.xml' path='docs/members/example[@name="LogisticRegressionBinaryClassifier"]/*' />
+    /// <include file='doc.xml' path='doc/members/member[@name="LBFGS"]/*' />
+    /// <include file='doc.xml' path='docs/members/example[@name="LogisticRegressionBinaryClassifier"]/*' />
     public sealed partial class LogisticRegression : LbfgsTrainerBase<Float, ParameterMixingCalibratedPredictor>
     {
         public const string LoadNameValue = "LogisticRegression";
@@ -390,8 +390,8 @@ protected override ParameterMixingCalibratedPredictor CreatePredictor()
             Desc = Summary,
             UserName = UserNameValue,
             ShortName = ShortName,
-            XmlInclude = new[] { @"<include file='../Microsoft.ML.StandardLearners/Standard/LogisticRegression/doc.xml' path='docs/members/member[@name=""LBFGS""]/*' />",
-                                 @"<include file='../Microsoft.ML.StandardLearners/Standard/LogisticRegression/doc.xml' path='docs/members/example[@name=""LogisticRegressionBinaryClassifier""]/*' />"})]
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.StandardLearners/Standard/LogisticRegression/doc.xml' path='doc/members/member[@name=""LBFGS""]/*' />",
+                                 @"<include file='../Microsoft.ML.StandardLearners/Standard/LogisticRegression/doc.xml' path='doc/members/example[@name=""LogisticRegressionBinaryClassifier""]/*' />"})]
                             
         public static CommonOutputs.BinaryClassificationOutput TrainBinary(IHostEnvironment env, Arguments input)
         {
diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs
index 5f7712843f..42cf9d690b 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs
@@ -36,8 +36,8 @@
 
 namespace Microsoft.ML.Runtime.Learners
 {
-    /// <include file = './doc.xml' path='docs/members/member[@name="LBFGS"]/*' />
-    /// <include file = './doc.xml' path='docs/members/example[@name="LogisticRegressionClassifier"]/*' />
+    /// <include file = 'doc.xml' path='doc/members/member[@name="LBFGS"]/*' />
+    /// <include file = 'doc.xml' path='docs/members/example[@name="LogisticRegressionClassifier"]/*' />
     public sealed class MulticlassLogisticRegression : LbfgsTrainerBase<VBuffer<Float>, MulticlassLogisticRegressionPredictor>
     {
         public const string LoadNameValue = "MultiClassLogisticRegression";
@@ -964,8 +964,8 @@ public partial class LogisticRegression
             Desc = Summary,
             UserName = MulticlassLogisticRegression.UserNameValue,
             ShortName = MulticlassLogisticRegression.ShortName,
-            XmlInclude = new[] { @"<include file='../Microsoft.ML.StandardLearners/Standard/LogisticRegression/doc.xml' path='docs/members/member[@name=""LBFGS""]/*' />",
-                                 @"<include file='../Microsoft.ML.StandardLearners/Standard/LogisticRegression/doc.xml' path='docs/members/example[@name=""LogisticRegressionClassifier""]/*' />" })]
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.StandardLearners/Standard/LogisticRegression/doc.xml' path='doc/members/member[@name=""LBFGS""]/*' />",
+                                 @"<include file='../Microsoft.ML.StandardLearners/Standard/LogisticRegression/doc.xml' path='doc/members/example[@name=""LogisticRegressionClassifier""]/*' />" })]
         public static CommonOutputs.MulticlassClassificationOutput TrainMultiClass(IHostEnvironment env, MulticlassLogisticRegression.Arguments input)
         {
             Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/doc.xml b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/doc.xml
index 5ac68e2fc0..03b844a0ea 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/doc.xml
+++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/doc.xml
@@ -1,40 +1,41 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
-<docs>
+<doc>
   <members>
 
     <member name="LBFGS">
       <summary>
-        Logistic Regression is a method in statistics used to predict the probability of occurrence of an event and can be used as a classification algorithm. 
-        The algorithm predicts the probability of occurrence of an event by fitting data to a logistical function.
+        Logistic Regression is a method in statistics used to predict the probability of occurrence of an event and can be used as 
+        a classification algorithm. The algorithm predicts the probability of occurrence of an event by fitting data to a logistical function.
       </summary>
       <remarks>
-        If the dependent variable has more than two possible values (blood type given diagnostic test results), then the logistic regression is multinomial.
+        If the dependent variable has more than two possible values (blood type given diagnostic test results), 
+        then the logistic regression is multinomial.
         <para>
-          The optimization technique used for LogisticRegression Classifier is the limited memory Broyden-Fletcher-Goldfarb-Shanno (L-BFGS).
-          Both the L-BFGS and regular BFGS algorithms use quasi-Newtonian methods to estimate the computationally intensive Hessian matrix in the equation used by Newton's method to calculate steps.
+          The optimization technique used for LogisticRegression Classifier is based on the limited memory Broyden-Fletcher-Goldfarb-Shanno (L-BFGS).
+          Both the L-BFGS and regular BFGS algorithms use quasi-Newtonian methods to estimate the computationally intensive 
+          Hessian matrix in the equation used by Newton's method to calculate steps.
           But the L-BFGS approximation uses only a limited amount of memory to compute the next step direction,
           so that it is especially suited for problems with a large number of variables.
-          The <paramref>MemorySize</paramref> parameter specifies the number of past positions and gradients to store for use in the computation of the next step.
+          The MemorySize argument specifies the number of past positions and gradients to store for use in the 
+          computation of the next step.
         </para>
         <para>
-          This learner can use elastic net regularization: a linear combination of L1 (lasso) and L2 (ridge) regularizations.
-          Regularization is a method that can render an ill-posed problem more tractable by imposing constraints that provide information to supplement the data and that prevents overfitting by penalizing models with extreme coefficient values.
+          This learner can use elastic net regularization: a linear combination of L1 (LASSO) and L2 (ridge) regularizations.
+          Regularization is a method that can render an ill-posed problem more tractable by imposing constraints that provide information 
+          to supplement the data and that prevents overfitting by penalizing models with extreme coefficient values.
           This can improve the generalization of the model learned by selecting the optimal complexity in the bias-variance tradeoff.
           Regularization works by adding the penalty that is associated with coefficient values to the error of the hypothesis.
-          An accurate model with extreme coefficient values would be penalized more, but a less accurate model with more conservative values would be penalized less. L1 and L2 regularization have different effects and uses that are complementary in certain respects.
+          An accurate model with extreme coefficient values would be penalized more, but a less accurate model with more conservative 
+          values would be penalized less. L1 and L2 regularization have different effects and uses that are complementary in certain respects.
         </para>
           <list type='bullet'>
-            <item>
-              <description>
-              <paramref>L1Weight</paramref>: can be applied to sparse models, when working with high-dimensional data.
-              It pulls small weights associated features that are relatively unimportant towards 0.
-            </description>
-            </item>
-            <item>
-              <description>
-                <paramref>L2Weight</paramref>: is preferable for data that is not sparse. It pulls large weights towards zero.
-              </description>
-            </item>
+            <item><description>
+              L1Weight can be applied to sparse models, when working with high-dimensional data. It pulls small weights associated features
+              that are relatively unimportant towards 0.
+              L1 regularization is an implementation of OWLQN, based on:
+              <a href="http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.68.5260">Scalable training of L1-regularized log-linear models</a>
+            </description></item>
+            <item><description>L2Weight is preferable for data that is not sparse. It pulls large weights towards zero.</description></item>
           </list>
           Adding the ridge penalty to the regularization overcomes some of lasso's limitations. It can improve its predictive accuracy, for example, when the number of predictors is greater than the sample size. If x = l1_weight and y = l2_weight, ax + by = c defines the linear span of the regularization terms.
           The default values of x and y are both 1.
@@ -50,18 +51,18 @@
     </member>
     <example name='LogisticRegressionClassifier'>
       <example>
-        <code>
+        <code language="csharp">
           pipeline.Add(new LogisticRegressionClassifier());
         </code>
       </example>
     </example>
     <example name='LogisticRegressionBinaryClassifier'>
       <example>
-        <code>
+        <code language="csharp">
           pipeline.Add(new LogisticRegressionBinaryClassifier());
         </code>
       </example>
     </example>
    
   </members>
-</docs>
\ No newline at end of file
+</doc>
\ No newline at end of file
diff --git a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs
index 94bac1d9ec..bb6c6101ae 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs
@@ -26,18 +26,13 @@
 
 namespace Microsoft.ML.Runtime.Learners
 {
+    /// <include file='doc.xml' path='doc/members/member[@name="MultiClassNaiveBayesTrainer"]' /> 
     public sealed class MultiClassNaiveBayesTrainer : TrainerBase<MultiClassNaiveBayesPredictor>
     {
         public const string LoadName = "MultiClassNaiveBayes";
         internal const string UserName = "Multiclass Naive Bayes";
         internal const string ShortName = "MNB";
         internal const string Summary = "Trains a multiclass Naive Bayes predictor that supports binary feature values.";
-        internal const string Remarks = @"<remarks>
-<a href ='https://en.wikipedia.org/wiki/Naive_Bayes_classifier'>Naive Bayes</a> is a probabilistic classifier that can be used for multiclass problems. 
-Using Bayes' theorem, the conditional probability for a sample belonging to a class can be calculated based on the sample count for each feature combination groups.
-However, Naive Bayes Classifier is feasible only if the number of features and the values each feature can take is relatively small.
-It also assumes that the features are strictly independent.
-</remarks>";
 
         public sealed class Arguments : LearnerInputBaseWithLabel
         {
@@ -126,7 +121,10 @@ public override MultiClassNaiveBayesPredictor Train(TrainContext context)
 
         [TlcModule.EntryPoint(Name = "Trainers.NaiveBayesClassifier",
             Desc = "Train a MultiClassNaiveBayesTrainer.",
-            UserName = UserName, ShortName = ShortName)]
+            UserName = UserName, 
+            ShortName = ShortName, 
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.StandardLearners/Standard/MultiClass/doc.xml' path='doc/members/member[@name=""MultiClassNaiveBayesTrainer""]'/>",
+                                 @"<include file='../Microsoft.ML.StandardLearners/Standard/MultiClass/doc.xml' path='doc/members/example[@name=""MultiClassNaiveBayesTrainer""]'/>" })]
         public static CommonOutputs.MulticlassClassificationOutput TrainMultiClassNaiveBayesTrainer(IHostEnvironment env, Arguments input)
         {
             Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Ova.cs b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Ova.cs
index 62e3a79631..ed1bc17aad 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Ova.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Ova.cs
@@ -36,6 +36,7 @@ namespace Microsoft.ML.Runtime.Learners
     using TScalarPredictor = IPredictorProducing<Float>;
     using TScalarTrainer = ITrainer<IPredictorProducing<Float>>;
 
+    /// <include file='doc.xml' path='doc/members/member[@name="OVA"]' /> 
     public sealed class Ova : MetaMulticlassTrainer<OvaPredictor, Ova.Arguments>
     {
         internal const string LoadNameValue = "OVA";
diff --git a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Pkpd.cs b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Pkpd.cs
index 1c4700ccdd..073488f75c 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Pkpd.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Pkpd.cs
@@ -31,6 +31,29 @@ namespace Microsoft.ML.Runtime.Learners
     using TDistPredictor = IDistPredictorProducing<Float, Float>;
     using CR = RoleMappedSchema.ColumnRole;
 
+    /// <summary>
+    /// In this strategy, a binary classification algorithm is trained on each pair of classes. 
+    /// The pairs are unordered but created with replacement: so, if there were three classes, 0, 1,
+    /// 2, we would train classifiers for the pairs (0,0), (0,1), (0,2), (1,1), (1,2),
+    /// and(2,2). For each binary classifier, an input data point is considered a
+    /// positive example if it is in either of the two classes in the pair, and a
+    /// negative example otherwise. At prediction time, the probabilities for each
+    /// pair of classes is considered as the probability of being in either class of
+    /// the pair given the data, and the final predictive probabilities out of that
+    /// per class are calculated given the probability that an example is in any given
+    /// pair.
+    ///
+    /// These two can allow you to exploit trainers that do not naturally have a
+    /// multiclass option, e.g., using the Runtime.FastTree.FastTreeBinaryClassificationTrainer 
+    /// to solve a multiclass problem.
+    /// Alternately, it can allow ML.NET to solve a "simpler" problem even in the cases
+    /// where the trainer has a multiclass option, but using it directly is not
+    /// practical due to, usually, memory constraints.For example, while a multiclass
+    /// logistic regression is a more principled way to solve a multiclass problem, it
+    /// requires that the learner store a lot more intermediate state in the form of
+    /// L-BFGS history for all classes *simultaneously*, rather than just one-by-one
+    /// as would be needed for OVA.
+    /// </summary>
     public sealed class Pkpd : MetaMulticlassTrainer<PkpdPredictor, Pkpd.Arguments>
     {
         internal const string LoadNameValue = "PKPD";
diff --git a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/doc.xml b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/doc.xml
new file mode 100644
index 0000000000..8d2af374db
--- /dev/null
+++ b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/doc.xml
@@ -0,0 +1,62 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<doc>
+  <members>
+
+    <member name="MultiClassNaiveBayesTrainer">
+      <summary>
+        Trains a multiclass Naive Bayes predictor that supports binary feature values.
+      </summary>
+      <remarks>
+        <a href ='https://en.wikipedia.org/wiki/Naive_Bayes_classifier'>Naive Bayes</a> is a probabilistic classifier that can be used for multiclass problems.
+        Using Bayes' theorem, the conditional probability for a sample belonging to a class can be calculated based on the sample count for each feature combination groups.
+        However, Naive Bayes Classifier is feasible only if the number of features and the values each feature can take is relatively small.
+        It assumes independence among the presence of features in a class even though they may be dependent on each other.
+        This multi-class trainer accepts binary feature values of type float, i.e., feature values are either true or false.
+        Specifically a feature value greater than zero is treated as true.
+      </remarks>
+      <seealso cref='Microsoft.ML.Trainers.LogisticRegressionClassifier'/>
+      <seealso cref='Microsoft.ML.Trainers.LightGbmClassifier'/>
+      <seealso cref='Microsoft.ML.Trainers.StochasticDualCoordinateAscentClassifier'/>
+      <seealso cref='Microsoft.ML.Models.OneVersusAll'/>
+    </member>
+    <example name="MultiClassNaiveBayesTrainer">
+      <example>
+        <code language="csharp">
+          pipeline.Add(new NaiveBayesClassifier
+            { 
+              NormalizeFeatures = NormalizeOption.Auto,
+              Caching = CachingOptions.Memory 
+            });
+        </code>
+      </example>
+    </example>
+
+    <member name="OVA">
+      <summary>
+        Trains a one-versus-all multi-class classifier on top of the specified binary classifier.
+      </summary>
+      <remarks>
+        <para>In this strategy, a binary classification algorithm is used to train one classifier for each class, which distinguishes that class from all other classes.
+        Prediction is then performed by running these binary classifiers, and choosing the prediction with the highest confidence score.</para>
+        <para>This algorithm can be used with any of the binary classifiers in ML.NET.
+        A few binary classifiers already have implementation for multi-class problems,
+        thus users can choose either one depending on the context.</para>
+        <para>The OVA version of a binary classifier, such as wrapping a LightGbmBinaryClassifier ,
+        can be different from LightGbmClassifier, which develops a multi-class classifier directly.</para>
+        <para>Note that even if the classifier indicates that it does not need caching, OneVersusAll will always
+        request caching, as it will be performing multiple passes over the data set.
+        These learner will request normalization from the data pipeline if the classifier indicates it would benefit from it.</para>
+      </remarks>
+      <seealso cref='Microsoft.ML.Trainers.LogisticRegressionClassifier'/>
+      <seealso cref='Microsoft.ML.Trainers.LightGbmClassifier'/>
+      <seealso cref='Microsoft.ML.Trainers.StochasticDualCoordinateAscentClassifier'/>
+      <seealso cref='Microsoft.ML.Trainers.NaiveBayesClassifier'/>
+      <example>
+        <code language="csharp">
+          pipeline.Add(OneVersusAll.With(new StochasticDualCoordinateAscentBinaryClassifier()));
+        </code>
+      </example>
+    </member>
+   
+  </members>
+</doc>
\ No newline at end of file
diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs
index a2e09b6905..c7c2d1d627 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs
@@ -28,7 +28,7 @@ namespace Microsoft.ML.Runtime.Learners
     //     - Loss function. By default, hinge loss (aka max-margin avgd perceptron)
     //     - Feature normalization. By default, rescaling between min and max values for every feature
     //     - Prediction calibration to produce probabilities. Off by default, if on, uses exponential (aka Platt) calibration.
-    /// <include file='./doc.xml' path='docs/members/member[@name="AP"]/*' />
+    /// <include file='doc.xml' path='doc/members/member[@name="AP"]/*' />
     public sealed class AveragedPerceptronTrainer :
         AveragedLinearTrainer<AveragedPerceptronTrainer.Arguments, LinearBinaryPredictor>
     {
@@ -91,7 +91,8 @@ protected override LinearBinaryPredictor CreatePredictor()
             Desc = Summary,
             UserName = UserNameValue,
             ShortName = ShortName,
-            XmlInclude = new[] { @"<include file='../Microsoft.ML.StandardLearners/Standard/Online/doc.xml' path='docs/members/member[@name=""AP""]/*' />" })]
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.StandardLearners/Standard/Online/doc.xml' path='doc/members/member[@name=""AP""]/*' />",
+                                 @"<include file='../Microsoft.ML.StandardLearners/Standard/Online/doc.xml' path='doc/members/example[@name=""AP""]/*' />"})]
         public static CommonOutputs.BinaryClassificationOutput TrainBinary(IHostEnvironment env, Arguments input)
         {
             Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs
index 5cbdc01478..78dc5ea3b2 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs
@@ -27,7 +27,7 @@ namespace Microsoft.ML.Runtime.Learners
 {
     using TPredictor = LinearRegressionPredictor;
 
-    /// <include file='./doc.xml' path='docs/members/member[@name="OGD"]/*' />
+    /// <include file='doc.xml' path='doc/members/member[@name="OGD"]/*' />
     public sealed class OnlineGradientDescentTrainer : AveragedLinearTrainer<OnlineGradientDescentTrainer.Arguments, TPredictor>
     {
         internal const string LoadNameValue = "OnlineGradientDescent";
@@ -89,7 +89,8 @@ protected override TPredictor CreatePredictor()
             Desc = "Train a Online gradient descent perceptron.",
             UserName = UserNameValue,
             ShortName = ShortName,
-            XmlInclude = new[] { @"<include file='../Microsoft.ML.StandardLearners/Standard/Online/doc.xml' path='docs/members/member[@name=""OGD""]/*' />" })]
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.StandardLearners/Standard/Online/doc.xml' path='doc/members/member[@name=""OGD""]/*' />",
+                                 @"<include file='../Microsoft.ML.StandardLearners/Standard/Online/doc.xml' path='doc/members/example[@name=""OGD""]/*' />"})]
         public static CommonOutputs.RegressionOutput TrainRegression(IHostEnvironment env, Arguments input)
         {
             Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/doc.xml b/src/Microsoft.ML.StandardLearners/Standard/Online/doc.xml
index 1ab7647c4f..0ace721221 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/Online/doc.xml
+++ b/src/Microsoft.ML.StandardLearners/Standard/Online/doc.xml
@@ -1,11 +1,11 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
-<docs>
+<doc>
   <members>
     
     <member name="OGD">
       <summary>
         Stochastic gradient descent is an optimization method used to train a wide range of models in machine learning. 
-        In the ML.Net the implementation of OGD, it is for linear regression. 
+        In the ML.Net the implementation of OGD, is for linear regression. 
       </summary>
       <remarks>
         Stochastic gradient descent uses a simple yet efficient iterative technique to fit model coefficients using error gradients for convex loss functions.
@@ -13,6 +13,18 @@
         and an option to update the weight vector using the average of the vectors seen over time (averaged argument is set to True by default).
       </remarks>
     </member>
+    <example>
+      <example name="OGD">
+        <code language="csharp">
+          new OnlineGradientDescentRegressor
+          {
+            NumIterations = 10,
+            L2RegularizerWeight = 0.6f,
+            LossFunction = new PoissonLossRegressionLossFunction()
+          }
+        </code>
+      </example>
+    </example>
 
     <member name="AP">
       <summary>
@@ -39,6 +51,18 @@
         <para><a href='http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.48.8200'>Large Margin Classification Using the Perceptron Algorithm</a></para>
       </remarks>
     </member>
+    <example>
+      <example name="AP">
+        <code language="csharp">
+          new AveragedPerceptronBinaryClassifier
+          {
+            NumIterations = 10,
+            L2RegularizerWeight = 0.01f,
+            LossFunction = new ExpLossClassificationLossFunction()
+          }
+        </code>
+      </example>
+    </example>
 
   </members>
-</docs>
\ No newline at end of file
+</doc>
\ No newline at end of file
diff --git a/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs
index 94dbb42325..d8bcd68c9f 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs
@@ -26,7 +26,7 @@
 
 namespace Microsoft.ML.Runtime.Learners
 {
-    /// <include file='./doc.xml' path='docs/members/member[@name="PoissonRegression"]/*' />
+    /// <include file='doc.xml' path='doc/members/member[@name="PoissonRegression"]/*' />
     public sealed class PoissonRegression : LbfgsTrainerBase<Float, PoissonRegressionPredictor>
     {
         internal const string LoadNameValue = "PoissonRegression";
@@ -127,7 +127,8 @@ protected override void ProcessPriorDistribution(Float label, Float weight)
             Desc = "Train an Poisson regression model.", 
             UserName = UserNameValue, 
             ShortName = ShortName,
-            XmlInclude = new[] { @"<include file='../Microsoft.ML.StandardLearners/Standard/PoissonRegression/doc.xml' path='docs/members/member[@name=""PoissonRegression""]/*' />" })]
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.StandardLearners/Standard/PoissonRegression/doc.xml' path='doc/members/member[@name=""PoissonRegression""]/*' />",
+                                 @"<include file='../Microsoft.ML.StandardLearners/Standard/PoissonRegression/doc.xml' path='doc/members/example[@name=""PoissonRegression""]/*' />"})]
         public static CommonOutputs.RegressionOutput TrainRegression(IHostEnvironment env, Arguments input)
         {
             Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/doc.xml b/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/doc.xml
index 4d2aeec579..ec14c9446b 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/doc.xml
+++ b/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/doc.xml
@@ -1,5 +1,5 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
-<docs>
+<doc>
   <members>
    
     <member name="PoissonRegression">
@@ -12,6 +12,17 @@
         Assuming that the dependent variable follows a Poisson distribution, the parameters of the regressor can be estimated by maximizing the likelihood of the obtained observations.
       </remarks>
     </member>
+    <example>
+      <example name="PoissonRegression">
+        <code language="csharp">
+          new PoissonRegressor
+          {
+            MaxIterations = 100,
+            L2Weight = 0.6f
+          }
+        </code>
+      </example>
+    </example>
    
   </members>
-</docs>
\ No newline at end of file
+</doc>
\ No newline at end of file
diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs
index facceffd70..0354668b31 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs
@@ -29,7 +29,7 @@ namespace Microsoft.ML.Runtime.Learners
     using TVectorPredictor = IPredictorProducing<VBuffer<Float>>;
 
     // SDCA linear multiclass trainer.
-    /// <include file='./doc.xml' path='docs/members/member[@name="SDCA"]/*' />
+    /// <include file='doc.xml' path='doc/members/member[@name="SDCA"]/*' />
     public class SdcaMultiClassTrainer : SdcaTrainerBase<TVectorPredictor>
     {
         public const string LoadNameValue = "SDCAMC";
@@ -375,7 +375,8 @@ public static partial class Sdca
             Desc = SdcaMultiClassTrainer.Summary,
             UserName = SdcaMultiClassTrainer.UserNameValue,
             ShortName = SdcaMultiClassTrainer.ShortName,
-            XmlInclude = new[] { @"<include file='../Microsoft.ML.StandardLearners/Standard/doc.xml' path='docs/members/member[@name=""SDCA""]/*' />" })]
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.StandardLearners/Standard/doc.xml' path='doc/members/member[@name=""SDCA""]/*' />",
+                                 @"<include file='../Microsoft.ML.StandardLearners/Standard/doc.xml' path='doc/members/example[@name=""StochasticDualCoordinateAscentClassifier""]/*' />" })]
         public static CommonOutputs.MulticlassClassificationOutput TrainMultiClass(IHostEnvironment env, SdcaMultiClassTrainer.Arguments input)
         {
             Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaRegression.cs
index 466625a679..bfa0796bed 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/SdcaRegression.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaRegression.cs
@@ -25,7 +25,7 @@ namespace Microsoft.ML.Runtime.Learners
 {
     using TScalarPredictor = IPredictorWithFeatureWeights<Float>;
 
-    /// <include file='./doc.xml' path='docs/members/member[@name="SDCA"]/*' />
+    /// <include file='doc.xml' path='doc/members/member[@name="SDCA"]/*' />
     public sealed class SdcaRegressionTrainer : SdcaTrainerBase<TScalarPredictor>
     {
         public const string LoadNameValue = "SDCAR";
@@ -127,7 +127,8 @@ public static partial class Sdca
             Desc = SdcaRegressionTrainer.Summary,
             UserName = SdcaRegressionTrainer.UserNameValue,
             ShortName = SdcaRegressionTrainer.ShortName,
-            XmlInclude = new[] { @"<include file='../Microsoft.ML.StandardLearners/Standard/doc.xml' path='docs/members/member[@name=""SDCA""]/*' />" })]
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.StandardLearners/Standard/doc.xml' path='doc/members/member[@name=""SDCA""]/*' />",
+                                 @"<include file='../Microsoft.ML.StandardLearners/Standard/doc.xml' path='doc/members/example[@name=""StochasticDualCoordinateAscentRegressor""]/*' />" })]
         public static CommonOutputs.RegressionOutput TrainRegression(IHostEnvironment env, SdcaRegressionTrainer.Arguments input)
         {
             Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.StandardLearners/Standard/doc.xml b/src/Microsoft.ML.StandardLearners/Standard/doc.xml
index 0b4336a96e..a704827b88 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/doc.xml
+++ b/src/Microsoft.ML.StandardLearners/Standard/doc.xml
@@ -1,5 +1,5 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
-<docs>
+<doc>
   <members>
     
     <member name="SDCA">
@@ -7,7 +7,7 @@
         Train an SDCA linear model.
       </summary>
       <remarks>
-        This classifier is a trainer based on the Stochastic DualCoordinate Ascent(SDCA) method, a state-of-the-art optimization technique for convex objective functions.
+        This classifier is a trainer based on the Stochastic Dual Coordinate Ascent(SDCA) method, a state-of-the-art optimization technique for convex objective functions.
         The algorithm can be scaled for use on large out-of-memory data sets due to a semi-asynchronized implementation that supports multi-threading.
         <para>
           Convergence is underwritten by periodically enforcing synchronization between primal and dual updates in a separate thread.
@@ -21,10 +21,53 @@
           Elastic net regularization can be specified by the 'L2Const' and 'L1Threshold' parameters. Note that the 'L2Const' has an effect on the rate of convergence.
           In general, the larger the 'L2Const', the faster SDCA converges.
         </para>
-        <a href='https://www.microsoft.com/en-us/research/wp-content/uploads/2016/06/main-3.pdf'>Scaling Up Stochastic Dual Coordinate Ascent</a>.
-        <a href='http://www.jmlr.org/papers/volume14/shalev-shwartz13a/shalev-shwartz13a.pdf'>Stochastic Dual Coordinate Ascent Methods for Regularized Loss Minimization</a>.
-      </remarks>
+        <para>For more information, see also:</para>
+        <list>
+          <item><description>
+            <a href='https://www.microsoft.com/en-us/research/wp-content/uploads/2016/06/main-3.pdf'>Scaling Up Stochastic Dual Coordinate Ascent</a>.
+          </description></item>
+          <item><description>
+            <a href='http://www.jmlr.org/papers/volume14/shalev-shwartz13a/shalev-shwartz13a.pdf'>Stochastic Dual Coordinate Ascent Methods for Regularized Loss Minimization</a>.
+          </description></item>
+        </list>
+       </remarks>
     </member>
+    <example name="StochasticDualCoordinateAscentBinaryClassifier">
+      <example>
+        <code language="csharp">
+          new StochasticDualCoordinateAscentBinaryClassifier
+          {
+            MaxIterations = 100,
+            NumThreads = 7,
+            LossFunction = new SmoothedHingeLossSDCAClassificationLossFunction(),
+            Caching = Microsoft.ML.Models.CachingOptions.Disk
+          }
+        </code>
+      </example>
+    </example>
+    <example name="StochasticDualCoordinateAscentClassifier">
+      <example>
+        <code language="csharp">
+          new StochasticDualCoordinateAscentClassifier
+          {
+            MaxIterations = 100,
+            NumThreads = 7,
+            LossFunction = new SmoothedHingeLossSDCAClassificationLossFunction()
+          }
+        </code>
+      </example>
+    </example>
+    <example name="StochasticDualCoordinateAscentRegressor">
+      <example>
+        <code language="csharp">
+          new StochasticDualCoordinateAscentRegressor
+          {
+            MaxIterations = 100,
+            NumThreads = 5
+          }
+        </code>
+      </example>
+    </example>
  
   </members>
-</docs>
\ No newline at end of file
+</doc>
\ No newline at end of file
diff --git a/src/Microsoft.ML.Transforms/CategoricalHashTransform.cs b/src/Microsoft.ML.Transforms/CategoricalHashTransform.cs
index f400d92a93..42d1f4d310 100644
--- a/src/Microsoft.ML.Transforms/CategoricalHashTransform.cs
+++ b/src/Microsoft.ML.Transforms/CategoricalHashTransform.cs
@@ -19,7 +19,7 @@
 
 namespace Microsoft.ML.Runtime.Data
 {
-    /// <include file='./doc.xml' path='docs/members/member[@name="CategoricalHashOneHotVectorizer"]/*' />
+    /// <include file='doc.xml' path='doc/members/member[@name="CategoricalHashOneHotVectorizer"]/*' />
     public static class CategoricalHashTransform
     {
         public const int NumBitsLim = 31; // can't convert 31-bit hashes to indicator vectors, so max is 30
diff --git a/src/Microsoft.ML.Transforms/CategoricalTransform.cs b/src/Microsoft.ML.Transforms/CategoricalTransform.cs
index fc1382901b..420db1b731 100644
--- a/src/Microsoft.ML.Transforms/CategoricalTransform.cs
+++ b/src/Microsoft.ML.Transforms/CategoricalTransform.cs
@@ -21,7 +21,7 @@
 [assembly: LoadableClass(typeof(void), typeof(Categorical), null, typeof(SignatureEntryPointModule), "Categorical")]
 namespace Microsoft.ML.Runtime.Data
 {
-    /// <include file='./doc.xml' path='docs/members/member[@name="CategoricalOneHotVectorizer"]/*' />
+    /// <include file='doc.xml' path='doc/members/member[@name="CategoricalOneHotVectorizer"]/*' />
     public static class CategoricalTransform
     {
         public enum OutputKind : byte
@@ -246,7 +246,8 @@ public static class Categorical
         [TlcModule.EntryPoint(Name = "Transforms.CategoricalOneHotVectorizer", 
             Desc = CategoricalTransform.Summary,
             UserName = CategoricalTransform.UserName, 
-            XmlInclude = new[] { @"<include file='../Microsoft.ML.Transforms/doc.xml' path='docs/members/member[@name=""CategoricalOneHotVectorizer""]/*' />" })]
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/member[@name=""CategoricalOneHotVectorizer""]/*' />",
+                                 @"<include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/example[@name=""CategoricalOneHotVectorizer""]/*' />"})]
         public static CommonOutputs.TransformOutput CatTransformDict(IHostEnvironment env, CategoricalTransform.Arguments input)
         {
             Contracts.CheckValue(env, nameof(env));
@@ -261,7 +262,8 @@ public static CommonOutputs.TransformOutput CatTransformDict(IHostEnvironment en
         [TlcModule.EntryPoint(Name = "Transforms.CategoricalHashOneHotVectorizer", 
             Desc = CategoricalHashTransform.Summary,
             UserName = CategoricalHashTransform.UserName ,
-            XmlInclude = new[] { @"<include file='../Microsoft.ML.Transforms/doc.xml' path='docs/members/member[@name=""CategoricalHashOneHotVectorizer""]/*' />" })]
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/member[@name=""CategoricalHashOneHotVectorizer""]/*' />",
+                                 @"<include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/example[@name=""CategoricalHashOneHotVectorizer""]/*' />"})]
         public static CommonOutputs.TransformOutput CatTransformHash(IHostEnvironment env, CategoricalHashTransform.Arguments input)
         {
             Contracts.CheckValue(env, nameof(env));
@@ -273,7 +275,11 @@ public static CommonOutputs.TransformOutput CatTransformHash(IHostEnvironment en
             return new CommonOutputs.TransformOutput { Model = new TransformModel(env, xf, input.Data), OutputData = xf };
         }
 
-        [TlcModule.EntryPoint(Name = "Transforms.TextToKeyConverter", Desc = TermTransform.Summary, UserName = TermTransform.UserName)]
+        [TlcModule.EntryPoint(Name = "Transforms.TextToKeyConverter",
+            Desc = TermTransform.Summary, 
+            UserName = TermTransform.UserName,
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.Data/Transforms/doc.xml' path='doc/members/member[@name=""TextToKey""]/*' />",
+                                 @"<include file='../Microsoft.ML.Data/Transforms/doc.xml' path='doc/members/example[@name=""TextToKey""]/*' />" })]
         public static CommonOutputs.TransformOutput TextToKey(IHostEnvironment env, TermTransform.Arguments input)
         {
             Contracts.CheckValue(env, nameof(env));
@@ -285,7 +291,10 @@ public static CommonOutputs.TransformOutput TextToKey(IHostEnvironment env, Term
             return new CommonOutputs.TransformOutput { Model = new TransformModel(env, xf, input.Data), OutputData = xf };
         }
 
-        [TlcModule.EntryPoint(Name = "Transforms.KeyToTextConverter", Desc = "KeyToValueTransform utilizes KeyValues metadata to map key indices to the corresponding values in the KeyValues metadata.", UserName = KeyToValueTransform.UserName)]
+        [TlcModule.EntryPoint(Name = "Transforms.KeyToTextConverter", 
+            Desc = "KeyToValueTransform utilizes KeyValues metadata to map key indices to the corresponding values in the KeyValues metadata.", 
+            UserName = KeyToValueTransform.UserName,
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/member[@name=""KeyToText""]/*' />" })]
         public static CommonOutputs.TransformOutput KeyToText(IHostEnvironment env, KeyToValueTransform.Arguments input)
         {
             Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.Transforms/CountFeatureSelection.cs b/src/Microsoft.ML.Transforms/CountFeatureSelection.cs
index 79adda882e..c01508fc30 100644
--- a/src/Microsoft.ML.Transforms/CountFeatureSelection.cs
+++ b/src/Microsoft.ML.Transforms/CountFeatureSelection.cs
@@ -18,11 +18,7 @@
 
 namespace Microsoft.ML.Runtime.Data
 {
-    /// <summary>
-    /// Selects the slots for which the count of non-default values is greater than a threshold.
-    /// Uses a set of aggregators to count the number of non-default values for each slot and
-    /// instantiates a DropSlots transform to actually drop the slots.
-    /// </summary>
+    /// <include file='doc.xml' path='doc/members/member[@name="CountFeatureSelection"]' /> 
     public static class CountFeatureSelectionTransform
     {
         public const string Summary = "Selects the slots for which the count of non-default values is greater than or equal to a threshold.";
diff --git a/src/Microsoft.ML.Transforms/EntryPoints/SelectFeatures.cs b/src/Microsoft.ML.Transforms/EntryPoints/SelectFeatures.cs
index 583b7e00ad..2d7246763f 100644
--- a/src/Microsoft.ML.Transforms/EntryPoints/SelectFeatures.cs
+++ b/src/Microsoft.ML.Transforms/EntryPoints/SelectFeatures.cs
@@ -11,7 +11,11 @@ namespace Microsoft.ML.Runtime.EntryPoints
 {
     public static class SelectFeatures
     {
-        [TlcModule.EntryPoint(Name = "Transforms.FeatureSelectorByCount", Desc = CountFeatureSelectionTransform.Summary, UserName = CountFeatureSelectionTransform.UserName)]
+        [TlcModule.EntryPoint(Name = "Transforms.FeatureSelectorByCount", 
+            Desc = CountFeatureSelectionTransform.Summary, 
+            UserName = CountFeatureSelectionTransform.UserName,
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/member[@name=""CountFeatureSelection""]'/>",
+                                 @"<include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/example[@name=""CountFeatureSelection""]'/>"})]
         public static CommonOutputs.TransformOutput CountSelect(IHostEnvironment env, CountFeatureSelectionTransform.Arguments input)
         {
             Contracts.CheckValue(env, nameof(env));
@@ -23,7 +27,12 @@ public static CommonOutputs.TransformOutput CountSelect(IHostEnvironment env, Co
             return new CommonOutputs.TransformOutput { Model = new TransformModel(env, xf, input.Data), OutputData = xf };
         }
 
-        [TlcModule.EntryPoint(Name = "Transforms.FeatureSelectorByMutualInformation", Desc = MutualInformationFeatureSelectionTransform.Summary, UserName = MutualInformationFeatureSelectionTransform.UserName, ShortName = MutualInformationFeatureSelectionTransform.ShortName)]
+        [TlcModule.EntryPoint(Name = "Transforms.FeatureSelectorByMutualInformation", 
+            Desc = MutualInformationFeatureSelectionTransform.Summary, 
+            UserName = MutualInformationFeatureSelectionTransform.UserName, 
+            ShortName = MutualInformationFeatureSelectionTransform.ShortName,
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/member[@name=""MutualInformationFeatureSelection""]'/>",
+                                 @"<include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/example[@name=""MutualInformationFeatureSelection""]'/>"})]
         public static CommonOutputs.TransformOutput MutualInformationSelect(IHostEnvironment env, MutualInformationFeatureSelectionTransform.Arguments input)
         {
             Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.Transforms/EntryPoints/TextAnalytics.cs b/src/Microsoft.ML.Transforms/EntryPoints/TextAnalytics.cs
index d6de490d52..b37ecd5f4f 100644
--- a/src/Microsoft.ML.Transforms/EntryPoints/TextAnalytics.cs
+++ b/src/Microsoft.ML.Transforms/EntryPoints/TextAnalytics.cs
@@ -17,11 +17,16 @@ namespace Microsoft.ML.Runtime.Transforms
     /// </summary>
     public static class TextAnalytics
     {
-        [TlcModule.EntryPoint(Name = "Transforms.TextFeaturizer", Desc = Data.TextTransform.Summary, UserName = Data.TextTransform.UserName, ShortName = Data.TextTransform.LoaderSignature)]
+        [TlcModule.EntryPoint(Name = "Transforms.TextFeaturizer", 
+            Desc = Data.TextTransform.Summary, 
+            UserName = Data.TextTransform.UserName, 
+            ShortName = Data.TextTransform.LoaderSignature,
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.Transforms/Text/doc.xml' path='doc/members/member[@name=""TextTransform""]/*' />" ,
+                                 @"<include file='../Microsoft.ML.Transforms/Text/doc.xml' path='doc/members/example[@name=""TextTransform""]/*' />"})]
         public static CommonOutputs.TransformOutput TextTransform(IHostEnvironment env, TextTransform.Arguments input)
         {
             var h = EntryPointUtils.CheckArgsAndCreateHost(env, "TextTransform", input);
-            var xf = Microsoft.ML.Runtime.Data.TextTransform.Create(h, input, input.Data);
+            var xf = Data.TextTransform.Create(h, input, input.Data);
             return new CommonOutputs.TransformOutput()
             {
                 Model = new TransformModel(h, xf, input.Data),
@@ -29,8 +34,12 @@ public static CommonOutputs.TransformOutput TextTransform(IHostEnvironment env,
             };
         }
 
-        [TlcModule.EntryPoint(Name = "Transforms.WordTokenizer", Desc = Data.DelimitedTokenizeTransform.Summary,
-            UserName = Data.DelimitedTokenizeTransform.UserName, ShortName = Data.DelimitedTokenizeTransform.LoaderSignature)]
+        [TlcModule.EntryPoint(Name = "Transforms.WordTokenizer", 
+            Desc = Data.DelimitedTokenizeTransform.Summary,
+            UserName = Data.DelimitedTokenizeTransform.UserName, 
+            ShortName = Data.DelimitedTokenizeTransform.LoaderSignature,
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.Transforms/Text/doc.xml' path='doc/members/member[@name=""WordTokenizer""]/*' />",
+                                 @"<include file='../Microsoft.ML.Transforms/Text/doc.xml' path='doc/members/example[@name=""WordTokenizer""]/*' />"})]
         public static CommonOutputs.TransformOutput DelimitedTokenizeTransform(IHostEnvironment env, DelimitedTokenizeTransform.Arguments input)
         {
             var h = EntryPointUtils.CheckArgsAndCreateHost(env, "DelimitedTokenizeTransform", input);
@@ -42,7 +51,11 @@ public static CommonOutputs.TransformOutput DelimitedTokenizeTransform(IHostEnvi
             };
         }
 
-        [TlcModule.EntryPoint(Name = "Transforms.NGramTranslator", Desc = Data.NgramTransform.Summary, UserName = Data.NgramTransform.UserName, ShortName = Data.NgramTransform.LoaderSignature)]
+        [TlcModule.EntryPoint(Name = "Transforms.NGramTranslator", 
+            Desc = NgramTransform.Summary, 
+            UserName = NgramTransform.UserName, 
+            ShortName = NgramTransform.LoaderSignature,
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.Transforms/Text/doc.xml' path='doc/members/member[@name=""NgramTranslator""]/*' />" })]
         public static CommonOutputs.TransformOutput NGramTransform(IHostEnvironment env, NgramTransform.Arguments input)
         {
             var h = EntryPointUtils.CheckArgsAndCreateHost(env, "NGramTransform", input);
@@ -54,7 +67,10 @@ public static CommonOutputs.TransformOutput NGramTransform(IHostEnvironment env,
             };
         }
 
-        [TlcModule.EntryPoint(Name = "Transforms.Dictionarizer", Desc = Data.TermTransform.Summary, UserName = Data.TermTransform.UserName, ShortName = Data.TermTransform.LoaderSignature)]
+        [TlcModule.EntryPoint(Name = "Transforms.Dictionarizer", 
+            Desc = Data.TermTransform.Summary, 
+            UserName = Data.TermTransform.UserName, 
+            ShortName = Data.TermTransform.LoaderSignature)]
         public static CommonOutputs.TransformOutput TermTransform(IHostEnvironment env, TermTransform.Arguments input)
         {
             var h = EntryPointUtils.CheckArgsAndCreateHost(env, "TermTransform", input);
@@ -66,7 +82,12 @@ public static CommonOutputs.TransformOutput TermTransform(IHostEnvironment env,
             };
         }
 
-        [TlcModule.EntryPoint(Name = "Transforms.SentimentAnalyzer", Desc = "Uses a pretrained sentiment model to score input strings", UserName = SentimentAnalyzingTransform.UserName, ShortName = SentimentAnalyzingTransform.ShortName)]
+        [TlcModule.EntryPoint(Name = "Transforms.SentimentAnalyzer", 
+            Desc = "Uses a pretrained sentiment model to score input strings", 
+            UserName = SentimentAnalyzingTransform.UserName, 
+            ShortName = SentimentAnalyzingTransform.ShortName,
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.Transforms/Text/doc.xml' path='doc/members/member[@name=""SentimentAnalyzer""]/*' />",
+                                 @"<include file='../Microsoft.ML.Transforms/Text/doc.xml' path='doc/members/example[@name=""SentimentAnalyzer""]/*' />"})]
         public static CommonOutputs.TransformOutput AnalyzeSentiment(IHostEnvironment env, SentimentAnalyzingTransform.Arguments input)
         {
             var h = EntryPointUtils.CheckArgsAndCreateHost(env, "SentimentAnalyzer", input);
@@ -78,7 +99,11 @@ public static CommonOutputs.TransformOutput AnalyzeSentiment(IHostEnvironment en
             };
         }
 
-        [TlcModule.EntryPoint(Name = "Transforms.CharacterTokenizer", Desc = CharTokenizeTransform.Summary, UserName = CharTokenizeTransform.UserName, ShortName = CharTokenizeTransform.LoaderSignature)]
+        [TlcModule.EntryPoint(Name = "Transforms.CharacterTokenizer", 
+            Desc = CharTokenizeTransform.Summary, 
+            UserName = CharTokenizeTransform.UserName, 
+            ShortName = CharTokenizeTransform.LoaderSignature,
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.Transforms/Text/doc.xml' path='doc/members/member[@name=""CharacterTokenizer""]/*' />" })]
         public static CommonOutputs.TransformOutput CharTokenize(IHostEnvironment env, CharTokenizeTransform.Arguments input)
         {
             Contracts.CheckValue(env, nameof(env));
@@ -93,7 +118,12 @@ public static CommonOutputs.TransformOutput CharTokenize(IHostEnvironment env, C
             };
         }
 
-        [TlcModule.EntryPoint(Name = "Transforms.LightLda", Desc = LdaTransform.Summary, UserName = LdaTransform.UserName, ShortName = LdaTransform.ShortName)]
+        [TlcModule.EntryPoint(Name = "Transforms.LightLda", 
+            Desc = LdaTransform.Summary, 
+            UserName = LdaTransform.UserName, 
+            ShortName = LdaTransform.ShortName,
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.Transforms/Text/doc.xml' path='doc/members/member[@name=""LightLDA""]/*' />",
+                                 @"<include file='../Microsoft.ML.Transforms/Text/doc.xml' path='doc/members/example[@name=""LightLDA""]/*' />" })]
         public static CommonOutputs.TransformOutput LightLda(IHostEnvironment env, LdaTransform.Arguments input)
         {
             Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.Transforms/GcnTransform.cs b/src/Microsoft.ML.Transforms/GcnTransform.cs
index a7a69ff1f4..fd67e5fca6 100644
--- a/src/Microsoft.ML.Transforms/GcnTransform.cs
+++ b/src/Microsoft.ML.Transforms/GcnTransform.cs
@@ -38,7 +38,7 @@ namespace Microsoft.ML.Runtime.Data
     ///    Performs the following operation on a vector X:
     ///         Y = (s * X - M) / D, where s is a scale, M is mean and D is either L2 norm or standard deviation.
     ///    Usage examples and Matlab code:
-    ///    <see href="http://www.cs.stanford.edu/~acoates/papers/coatesleeng_aistats_2011.pdf"/>
+    ///    <a href="http://www.cs.stanford.edu/~acoates/papers/coatesleeng_aistats_2011.pdf">http://www.cs.stanford.edu/~acoates/papers/coatesleeng_aistats_2011.pdf</a>.
     /// </summary>
     public sealed class LpNormNormalizerTransform : OneToOneTransformBase
     {
@@ -666,7 +666,11 @@ private static Float Mean(Float[] src, int count, int length)
 
     public static class LpNormalization
     {
-        [TlcModule.EntryPoint(Name = "Transforms.LpNormalizer", Desc = LpNormNormalizerTransform.Summary, UserName = LpNormNormalizerTransform.UserNameLP, ShortName = LpNormNormalizerTransform.ShortNameLP)]
+        [TlcModule.EntryPoint(Name = "Transforms.LpNormalizer", 
+            Desc = LpNormNormalizerTransform.Summary, 
+            UserName = LpNormNormalizerTransform.UserNameLP, 
+            ShortName = LpNormNormalizerTransform.ShortNameLP,
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/member[@name=""LpNormalize""]/*' />" })]
         public static CommonOutputs.TransformOutput Normalize(IHostEnvironment env, LpNormNormalizerTransform.Arguments input)
         {
             var h = EntryPointUtils.CheckArgsAndCreateHost(env, "LpNormalize", input);
@@ -678,7 +682,11 @@ public static CommonOutputs.TransformOutput Normalize(IHostEnvironment env, LpNo
             };
         }
 
-        [TlcModule.EntryPoint(Name = "Transforms.GlobalContrastNormalizer", Desc = LpNormNormalizerTransform.GcnSummary, UserName = LpNormNormalizerTransform.UserNameGn, ShortName = LpNormNormalizerTransform.ShortNameGn)]
+        [TlcModule.EntryPoint(Name = "Transforms.GlobalContrastNormalizer", 
+            Desc = LpNormNormalizerTransform.GcnSummary, 
+            UserName = LpNormNormalizerTransform.UserNameGn, 
+            ShortName = LpNormNormalizerTransform.ShortNameGn,
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/member[@name=""GcNormalize""]/*' />" })]
         public static CommonOutputs.TransformOutput GcNormalize(IHostEnvironment env, LpNormNormalizerTransform.GcnArguments input)
         {
             var h = EntryPointUtils.CheckArgsAndCreateHost(env, "GcNormalize", input);
diff --git a/src/Microsoft.ML.Transforms/GroupTransform.cs b/src/Microsoft.ML.Transforms/GroupTransform.cs
index 2170c3b373..49c989adbf 100644
--- a/src/Microsoft.ML.Transforms/GroupTransform.cs
+++ b/src/Microsoft.ML.Transforms/GroupTransform.cs
@@ -664,7 +664,11 @@ public ValueGetter<TValue> GetGetter<TValue>(int col)
 
     public static partial class GroupingOperations
     {
-        [TlcModule.EntryPoint(Name = "Transforms.CombinerByContiguousGroupId", Desc = GroupTransform.Summary, UserName = GroupTransform.UserName, ShortName = GroupTransform.ShortName)]
+        [TlcModule.EntryPoint(Name = "Transforms.CombinerByContiguousGroupId", 
+            Desc = GroupTransform.Summary, 
+            UserName = GroupTransform.UserName, 
+            ShortName = GroupTransform.ShortName,
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/member[@name=""Group""]/*' />" })]
         public static CommonOutputs.TransformOutput Group(IHostEnvironment env, GroupTransform.Arguments input)
         {
             Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.Transforms/HashJoinTransform.cs b/src/Microsoft.ML.Transforms/HashJoinTransform.cs
index f7c2259a3a..31f3123df5 100644
--- a/src/Microsoft.ML.Transforms/HashJoinTransform.cs
+++ b/src/Microsoft.ML.Transforms/HashJoinTransform.cs
@@ -193,6 +193,7 @@ public HashJoinTransform(IHostEnvironment env,
         {
         }
 
+        /// <include file='doc.xml' path='doc/members/member[@name="HashJoin"]/*' />
         public HashJoinTransform(IHostEnvironment env, Arguments args, IDataView input)
             : base(env, RegistrationName, Contracts.CheckRef(args, nameof(args)).Column, input, TestColumnType)
         {
@@ -701,7 +702,12 @@ protected override ColumnType GetColumnTypeCore(int iinfo)
 
     public static class HashJoin
     {
-        [TlcModule.EntryPoint(Name = "Transforms.HashConverter", Desc = HashJoinTransform.Summary, UserName = HashJoinTransform.UserName, ShortName = HashJoinTransform.RegistrationName)]
+        [TlcModule.EntryPoint(Name = "Transforms.HashConverter", 
+            Desc = HashJoinTransform.Summary, 
+            UserName = HashJoinTransform.UserName, 
+            ShortName = HashJoinTransform.RegistrationName,
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/member[@name=""HashJoin""]/*' />",
+                                 @"<include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/example[@name=""HashJoin""]/*' />"})]
         public static CommonOutputs.TransformOutput Apply(IHostEnvironment env, HashJoinTransform.Arguments input)
         {
             Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs b/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs
index 39fec2b208..c6c5cd23e7 100644
--- a/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs
+++ b/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs
@@ -21,10 +21,7 @@
 
 namespace Microsoft.ML.Runtime.Data
 {
-    /// <summary>
-    /// Selects the top k slots ordered by their mutual information with the label column.
-    /// Instantiates a DropSlots transform to actually drop the slots.
-    /// </summary>
+    /// <include file='doc.xml' path='doc/members/member[@name="MutualInformationFeatureSelection"]' /> 
     public static class MutualInformationFeatureSelectionTransform
     {
         public const string Summary =
diff --git a/src/Microsoft.ML.Transforms/NADropTransform.cs b/src/Microsoft.ML.Transforms/NADropTransform.cs
index ded5add732..80e88f3ae3 100644
--- a/src/Microsoft.ML.Transforms/NADropTransform.cs
+++ b/src/Microsoft.ML.Transforms/NADropTransform.cs
@@ -21,9 +21,7 @@
 
 namespace Microsoft.ML.Runtime.Data
 {
-    /// <summary>
-    /// Transform to drop NAs from vector columns.
-    /// </summary>
+    /// <include file='doc.xml' path='doc/members/member[@name="NADrop"]'/>
     public sealed class NADropTransform : OneToOneTransformBase
     {
         public sealed class Arguments : TransformInputBase
diff --git a/src/Microsoft.ML.Transforms/NAHandleTransform.cs b/src/Microsoft.ML.Transforms/NAHandleTransform.cs
index 840a080ae6..746229d4f7 100644
--- a/src/Microsoft.ML.Transforms/NAHandleTransform.cs
+++ b/src/Microsoft.ML.Transforms/NAHandleTransform.cs
@@ -17,21 +17,7 @@
 
 namespace Microsoft.ML.Runtime.Data
 {
-    /// <summary>
-    /// This transform handles missing values in the input columns. For each input column, it creates an output column
-    /// where the missing values are replaced by one of these specified values:
-    /// - The default value of the appropriate type.
-    /// - The mean value of the appropriate type.
-    /// - The max value of the appropriate type.
-    /// - The min value of the appropriate type.
-    /// (The last three work only for numeric/time span/ DateTime columns).
-    /// The output column can also optionally include an indicator vector for which slots were missing in the input column
-    /// (this can be done only when the indicator vector type can be converted to the input column type, i.e. only for numeric columns).
-    /// 
-    /// When computing the mean/max/min value, there is also an option to compute it over the whole column instead of per slot. This option
-    /// has a default value of true for variable length vectors, and false for known length vectors. It can be changed to true for known
-    /// length vectors, but it results in an error if changed to false for variable length vectors.
-    /// </summary>
+    /// <include file='doc.xml' path='doc/members/member[@name="NAHandle"]'/>
     public static class NAHandleTransform
     {
         public enum ReplacementKind
diff --git a/src/Microsoft.ML.Transforms/NAHandling.cs b/src/Microsoft.ML.Transforms/NAHandling.cs
index 2ed6830782..7190291b16 100644
--- a/src/Microsoft.ML.Transforms/NAHandling.cs
+++ b/src/Microsoft.ML.Transforms/NAHandling.cs
@@ -11,7 +11,12 @@ namespace Microsoft.ML.Runtime.Data
 {
     public static class NAHandling
     {
-        [TlcModule.EntryPoint(Name = "Transforms.MissingValuesDropper", Desc = NADropTransform.Summary, UserName = NADropTransform.FriendlyName, ShortName = NADropTransform.ShortName)]
+        [TlcModule.EntryPoint(Name = "Transforms.MissingValuesDropper", 
+            Desc = NADropTransform.Summary,
+            UserName = NADropTransform.FriendlyName, 
+            ShortName = NADropTransform.ShortName,
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/member[@name=""NADrop""]/*' />",
+                                 @"<include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/example[@name=""NADrop""]/*' />" })]
         public static CommonOutputs.TransformOutput Drop(IHostEnvironment env, NADropTransform.Arguments input)
         {
             var h = EntryPointUtils.CheckArgsAndCreateHost(env, NADropTransform.ShortName, input);
@@ -23,7 +28,12 @@ public static CommonOutputs.TransformOutput Drop(IHostEnvironment env, NADropTra
             };
         }
 
-        [TlcModule.EntryPoint(Name = "Transforms.MissingValuesRowDropper", Desc = NAFilter.Summary, UserName = NAFilter.FriendlyName, ShortName = NAFilter.ShortName)]
+        [TlcModule.EntryPoint(Name = "Transforms.MissingValuesRowDropper", 
+            Desc = NAFilter.Summary, 
+            UserName = NAFilter.FriendlyName, 
+            ShortName = NAFilter.ShortName,
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.Data/Transforms/doc.xml' path='doc/members/member[@name=""NAFilter""]/*' />",  
+                                 @"<include file='../Microsoft.ML.Data/Transforms/doc.xml' path='doc/members/example[@name=""NAFilter""]/*' />"})]
         public static CommonOutputs.TransformOutput Filter(IHostEnvironment env, NAFilter.Arguments input)
         {
             var h = EntryPointUtils.CheckArgsAndCreateHost(env, NAFilter.ShortName, input);
@@ -35,7 +45,12 @@ public static CommonOutputs.TransformOutput Filter(IHostEnvironment env, NAFilte
             };
         }
 
-        [TlcModule.EntryPoint(Name = "Transforms.MissingValueHandler", Desc = NAHandleTransform.Summary, UserName = NAHandleTransform.FriendlyName, ShortName = NAHandleTransform.ShortName)]
+        [TlcModule.EntryPoint(Name = "Transforms.MissingValueHandler", 
+            Desc = NAHandleTransform.Summary, 
+            UserName = NAHandleTransform.FriendlyName, 
+            ShortName = NAHandleTransform.ShortName,
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.Data/Transforms/doc.xml' path='doc/members/member[@name=""NAHandle""]/*' />",
+                                 @"<include file='../Microsoft.ML.Data/Transforms/doc.xml' path='doc/members/example[@name=""NAHandle""]/*' />" })]
         public static CommonOutputs.TransformOutput Handle(IHostEnvironment env, NAHandleTransform.Arguments input)
         {
             var h = EntryPointUtils.CheckArgsAndCreateHost(env, "NAHandle", input);
@@ -47,7 +62,12 @@ public static CommonOutputs.TransformOutput Handle(IHostEnvironment env, NAHandl
             };
         }
 
-        [TlcModule.EntryPoint(Name = "Transforms.MissingValueIndicator", Desc = NAIndicatorTransform.Summary, UserName = NAIndicatorTransform.FriendlyName, ShortName = NAIndicatorTransform.ShortName)]
+        [TlcModule.EntryPoint(Name = "Transforms.MissingValueIndicator", 
+            Desc = NAIndicatorTransform.Summary, 
+            UserName = NAIndicatorTransform.FriendlyName, 
+            ShortName = NAIndicatorTransform.ShortName,
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/member[@name=""NAIndicator""]/*' />",
+                                 @"<include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/example[@name=""NAIndicator""]/*' />"})]
         public static CommonOutputs.TransformOutput Indicator(IHostEnvironment env, NAIndicatorTransform.Arguments input)
         {
             var h = EntryPointUtils.CheckArgsAndCreateHost(env, "NAIndicator", input);
@@ -59,7 +79,12 @@ public static CommonOutputs.TransformOutput Indicator(IHostEnvironment env, NAIn
             };
         }
 
-        [TlcModule.EntryPoint(Name = "Transforms.MissingValueSubstitutor", Desc = NAReplaceTransform.Summary, UserName = NAReplaceTransform.FriendlyName, ShortName = NAReplaceTransform.ShortName)]
+        [TlcModule.EntryPoint(Name = "Transforms.MissingValueSubstitutor", 
+            Desc = NAReplaceTransform.Summary, 
+            UserName = NAReplaceTransform.FriendlyName, 
+            ShortName = NAReplaceTransform.ShortName,
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/member[@name=""NAReplace""]/*' />",
+                                 @"<include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/example[@name=""NAReplace""]/*' />"})]
         public static CommonOutputs.TransformOutput Replace(IHostEnvironment env, NAReplaceTransform.Arguments input)
         {
             var h = EntryPointUtils.CheckArgsAndCreateHost(env, "NAReplace", input);
diff --git a/src/Microsoft.ML.Transforms/NAIndicatorTransform.cs b/src/Microsoft.ML.Transforms/NAIndicatorTransform.cs
index c35a90d748..7607e19c61 100644
--- a/src/Microsoft.ML.Transforms/NAIndicatorTransform.cs
+++ b/src/Microsoft.ML.Transforms/NAIndicatorTransform.cs
@@ -21,10 +21,7 @@
 
 namespace Microsoft.ML.Runtime.Data
 {
-    /// <summary>
-    /// This transform can transform either scalars or vectors (both fixed and variable size),
-    /// creating output columns that indicate corresponding NA values.
-    /// </summary>
+    /// <include file='doc.xml' path='doc/members/member[@name="NAIndicator"]'/>
     public sealed class NAIndicatorTransform : OneToOneTransformBase
     {
         public sealed class Column : OneToOneColumn
diff --git a/src/Microsoft.ML.Transforms/NAReplaceTransform.cs b/src/Microsoft.ML.Transforms/NAReplaceTransform.cs
index 30384780e9..cfa96ea9de 100644
--- a/src/Microsoft.ML.Transforms/NAReplaceTransform.cs
+++ b/src/Microsoft.ML.Transforms/NAReplaceTransform.cs
@@ -27,13 +27,12 @@
 
 namespace Microsoft.ML.Runtime.Data
 {
-    /// <summary>
-    /// This transform can transform either scalars or vectors (both fixed and variable size),
-    /// creating output columns that are identical to the input columns except for replacing NA values
-    /// with either the default value, user input, or imputed values (min/max/mean are currently supported).
-    /// Imputation modes are supported for vectors both by slot and across all slots.
-    /// </summary>
-    /// REVIEW: May make sense to implement the transform template interface.
+    // This transform can transform either scalars or vectors (both fixed and variable size),
+    // creating output columns that are identical to the input columns except for replacing NA values
+    // with either the default value, user input, or imputed values (min/max/mean are currently supported).
+    // Imputation modes are supported for vectors both by slot and across all slots.
+    // REVIEW: May make sense to implement the transform template interface.
+    /// <include file='doc.xml' path='doc/members/member[@name="NAReplace"]/*' />
     public sealed partial class NAReplaceTransform : OneToOneTransformBase
     {
         public enum ReplacementKind
diff --git a/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs b/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs
index 9e1bad374e..b118266b7b 100644
--- a/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs
+++ b/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs
@@ -26,14 +26,8 @@
 
 namespace Microsoft.ML.Runtime.DataPipe
 {
-    /// <summary>
-    /// This transform is used to mark some of the columns (e.g. Label) optional during training so that the columns are not required during scoring.
-    /// When applied to new data, if any of the optional columns is not present a dummy columns is created having the same properties (e.g. 'name', 'type' etc.) as used during training.
-    /// The columns are filled with default values. The value is
-    ///     - scalar for scalar column
-    ///     - totally sparse vector for vector column. 
-    /// </summary>
-    public sealed class OptionalColumnTransform : RowToRowMapperTransformBase
+    /// <include file='doc.xml' path='doc/members/member[@name="OptionalColumnTransform"]/*' />
+    public class OptionalColumnTransform : RowToRowMapperTransformBase
     {
         public sealed class Arguments : TransformInputBase
         {
@@ -477,7 +471,13 @@ private Delegate MakeGetterVec<T>(int length)
             }
         }
 
-        [TlcModule.EntryPoint(Desc = Summary, Name = "Transforms.OptionalColumnCreator", UserName = UserName, ShortName = ShortName)]
+        [TlcModule.EntryPoint(Desc = Summary, 
+            Name = "Transforms.OptionalColumnCreator", 
+            UserName = UserName, 
+            ShortName = ShortName,
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/member[@name=""OptionalColumnTransform""]/*' />",
+                                 @"<include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/example[@name=""OptionalColumnTransform""]/*' />"})]
+
         public static CommonOutputs.TransformOutput MakeOptional(IHostEnvironment env, Arguments input)
         {
             var h = EntryPointUtils.CheckArgsAndCreateHost(env, "OptionalColumn", input);
diff --git a/src/Microsoft.ML.Transforms/Text/LdaTransform.cs b/src/Microsoft.ML.Transforms/Text/LdaTransform.cs
index 588e0a86f1..7a9b214063 100644
--- a/src/Microsoft.ML.Transforms/Text/LdaTransform.cs
+++ b/src/Microsoft.ML.Transforms/Text/LdaTransform.cs
@@ -26,23 +26,22 @@
 
 namespace Microsoft.ML.Runtime.TextAnalytics
 {
-    /// <summary>
-    /// LightLDA transform: Big Topic Models on Modest Compute Clusters.
-    /// <see href="http://arxiv.org/abs/1412.1576">LightLDA</see> is an implementation of Latent Dirichlet Allocation (LDA).
-    /// Previous implementations of LDA such as SparseLDA or AliasLDA allow to achieve massive data and model scales,
-    /// for example models with tens of billions of parameters to be inferred from billions of documents.
-    /// However this requires using a cluster of thousands of machines with all ensuing costs to setup and maintain.
-    /// LightLDA solves this problem in a more cost-effective manner by providing an implementation 
-    /// that is efﬁcient enough for modest clusters with at most tens of machines... 
-    /// For more details please see original LightLDA paper: 
-    /// http://arxiv.org/abs/1412.1576
-    /// http://www.www2015.it/documents/proceedings/proceedings/p1351.pdf
-    /// and open source implementation: 
-    /// https://github.com/Microsoft/LightLDA
-    /// 
-    /// See <a href="https://github.com/dotnet/machinelearning/blob/master/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipe.cs"/>
-    /// for an example on how to use LdaTransform.
-    /// </summary>
+    // LightLDA transform: Big Topic Models on Modest Compute Clusters.
+    // <a href="http://arxiv.org/abs/1412.1576">LightLDA</a> is an implementation of Latent Dirichlet Allocation (LDA).
+    // Previous implementations of LDA such as SparseLDA or AliasLDA allow to achieve massive data and model scales,
+    // for example models with tens of billions of parameters to be inferred from billions of documents.
+    // However this requires using a cluster of thousands of machines with all ensuing costs to setup and maintain.
+    // LightLDA solves this problem in a more cost-effective manner by providing an implementation 
+    // that is efﬁcient enough for modest clusters with at most tens of machines... 
+    // For more details please see original LightLDA paper: 
+    // http://arxiv.org/abs/1412.1576
+    // http://www.www2015.it/documents/proceedings/proceedings/p1351.pdf
+    // and open source implementation: 
+    // https://github.com/Microsoft/LightLDA
+    // 
+    // See <a href="https://github.com/dotnet/machinelearning/blob/master/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipe.cs"/>
+    // for an example on how to use LdaTransform.
+    /// <include file='doc.xml' path='doc/members/member[@name="LightLDA"]/*' />
     public sealed class LdaTransform : OneToOneTransformBase
     {
         public sealed class Arguments : TransformInputBase
diff --git a/src/Microsoft.ML.Transforms/Text/SentimentAnalyzerTransform.cs b/src/Microsoft.ML.Transforms/Text/SentimentAnalyzerTransform.cs
index 7ff3d84d10..f3471e09e0 100644
--- a/src/Microsoft.ML.Transforms/Text/SentimentAnalyzerTransform.cs
+++ b/src/Microsoft.ML.Transforms/Text/SentimentAnalyzerTransform.cs
@@ -17,6 +17,7 @@
 
 namespace Microsoft.ML.Runtime.TextAnalytics
 {
+    /// <include file='doc.xml' path='doc/members/member[@name="SentimentAnalyzer"]/*' />
     public static class SentimentAnalyzingTransform
     {
         public sealed class Arguments : TransformInputBase
diff --git a/src/Microsoft.ML.Transforms/Text/TextTransform.cs b/src/Microsoft.ML.Transforms/Text/TextTransform.cs
index b2a34a1126..932bf63272 100644
--- a/src/Microsoft.ML.Transforms/Text/TextTransform.cs
+++ b/src/Microsoft.ML.Transforms/Text/TextTransform.cs
@@ -24,11 +24,10 @@ namespace Microsoft.ML.Runtime.Data
     using StopWordsLang = StopWordsRemoverTransform.Language;
     using CaseNormalizationMode = TextNormalizerTransform.CaseNormalizationMode;
 
-    /// <summary>
-    /// A transform that turns a collection of text documents into numerical feature vectors. The feature vectors are counts 
-    /// of (word or character) ngrams in a given text. It offers ngram hashing (finding the ngram token string name to feature
-    /// integer index mapping through hashing) as an option.
-    /// </summary>
+    // A transform that turns a collection of text documents into numerical feature vectors. The feature vectors are counts 
+    // of (word or character) ngrams in a given text. It offers ngram hashing (finding the ngram token string name to feature
+    // integer index mapping through hashing) as an option.
+    /// <include file='doc.xml' path='doc/members/member[@name="TextTransform"]/*' />
     public static class TextTransform
     {
         /// <summary>
diff --git a/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs b/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs
index 60500d33cc..5afd177763 100644
--- a/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs
+++ b/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs
@@ -35,11 +35,10 @@ public interface ITokenizeTransform : IDataTransform
     {
     }
 
-    /// <summary>
-    /// The input for this transform is a DvText or a vector of DvTexts, and its output is a vector of DvTexts,
-    /// corresponding to the tokens in the input text, split using a set of user specified separator characters.
-    /// Empty strings and strings containing only spaces are dropped.
-    /// </summary>
+    // The input for this transform is a DvText or a vector of DvTexts, and its output is a vector of DvTexts,
+    // corresponding to the tokens in the input text, split using a set of user specified separator characters.
+    // Empty strings and strings containing only spaces are dropped.
+    /// <include file='doc.xml' path='doc/members/member[@name="WordTokenizer"]/*' />
     public sealed class DelimitedTokenizeTransform : OneToOneTransformBase, ITokenizeTransform
     {
         public class Column : OneToOneColumn
diff --git a/src/Microsoft.ML.Transforms/Text/doc.xml b/src/Microsoft.ML.Transforms/Text/doc.xml
new file mode 100644
index 0000000000..5f734e1cfd
--- /dev/null
+++ b/src/Microsoft.ML.Transforms/Text/doc.xml
@@ -0,0 +1,188 @@
+﻿<?xml version="1.0" encoding="utf-8" ?>
+<doc>
+  <members>
+
+    <member name="TextTransform">
+      <summary>
+        A transform that turns a collection of text documents into numerical feature vectors.
+        The feature vectors are normalized counts of (word and/or character) ngrams in a given tokenized text.
+      </summary>
+      <remarks>
+        The TextFeaturizer transform gives user one-stop solution for doing:
+        <list type="bullet">
+          <item><description>Language Detection</description></item>
+          <item><description>Tokenzation​</description></item>
+          <item><description>Text normalization</description></item>
+          <item><description>Predefined and custom stopwords removal.</description></item>
+          <item><description>Word-based or character-based Ngram and SkipGram extraction.​</description></item>
+          <item><description>TF, IDF or TF-IDF.</description></item>
+          <item><description>L-p vector normalization.​</description></item>
+        </list>
+        The TextFeaturizer will show the transformed text, after being applied.
+        It converts a collection of text columns to a matrix of token  ngrams/skip-grams counts.
+        Features are made of (word/character) n-grams/skip-grams​ and the number of features are equal to the vocabulary size found by analyzing the data.
+      </remarks>
+    </member>
+    <example name="TextTransform">
+      <example>
+        <code language="csharp">
+          pipeline.Add(new TextFeaturizer(&quot;Features&quot;, &quot;SentimentText&quot;)
+          {
+            KeepDiacritics = false,
+            KeepPunctuations = false,
+            TextCase = TextNormalizerTransformCaseNormalizationMode.Lower,
+            OutputTokens = true,
+            StopWordsRemover = new PredefinedStopWordsRemover(),
+            VectorNormalizer = TextTransformTextNormKind.L2,
+            CharFeatureExtractor = new NGramNgramExtractor() { NgramLength = 3, AllLengths = false },
+            WordFeatureExtractor = new NGramNgramExtractor() { NgramLength = 2, AllLengths = true }
+          });
+        </code>
+      </example>
+    </example>
+
+    <member name="WordTokenizer">
+      <summary>
+        This transform splits the text into words using the separator character(s).
+      </summary>
+      <remarks>
+        The input for this transform is a <see cref="Microsoft.ML.Runtime.Data.DvText">DvText</see> or a vector of <see cref="Microsoft.ML.Runtime.Data.DvText">DvTexts</see>,
+        and its output is a vector of DvTexts, corresponding to the tokens in the input text.
+        The output is generated by splitting the input text, using a set of user specified separator characters.
+        Empty strings and strings containing only spaces are dropped.
+        This transform is not typically used on its own, but it is one of the transforms composing the Text Featurizer.
+      </remarks>
+    </member>
+    <example name="WordTokenizer">
+      <example>
+        <code language="csharp">
+          pipeline.Add( new WordTokenizer(&quot;TextColumn&quot;)
+          { 
+            TermSeparators = &quot;&apos; &apos;, &apos;\t&apos;, &apos;;&apos;&quot;  
+          });
+        </code>
+      </example>
+    </example>
+
+    <member name="NgramTranslator">
+      <summary>
+        This transform produces a bag of counts of n-grams (sequences of consecutive values of length 1-n) in a given vector of keys. 
+        It does so by building a dictionary of n-grams and using the id in the dictionary as the index in the bag.
+      </summary>
+      <remarks>
+        This transform produces a matrix of token ngrams/skip-grams counts for a given corpus of text.
+        The n-grams are represented as count vectors, with vector slots corresponding to n-grams.
+        Embedding ngrams in a vector space allows their contents to be compared in an efficient manner. 
+        The slot values in the vector can be weighted by the following factors:
+        <list>
+          <item>
+            <term>term frequency</term>
+            <description> the number of occurrences of the slot in the text</description>
+          </item>
+          <item>
+            <term>inverse document frequency</term>
+            <description> a ratio (the logarithm of inverse relative slot frequency)
+              that measures the information a slot provides by determining how common or rare it is across the entire text.</description>
+          </item>
+            <item>
+              <term>term frequency-inverse document frequency</term>
+              <description> the product term frequency and the inverse document frequency.</description>
+            </item>
+        </list>
+        This transform is not typically used on its own, but it is one of the transforms composing the <see cref="Microsoft.ML.Transforms.TextFeaturizer">Text Featurizer</see> .
+      </remarks>
+      <seealso cref="Microsoft.ML.Transforms.WordTokenizer"/>
+      <seealso cref="Microsoft.ML.Transforms.TextToKey"/>
+      <seealso cref="Microsoft.ML.Transforms.TextFeaturizer"/>
+      <seealso cref="Microsoft.ML.Transforms.CharacterTokenizer"/>
+      <example>
+        <code language="csharp">
+          pipeline.Add(new NGramTranslator(&quot;TextColumn&quot;)
+          { 
+            Weighting=NgramTransformWeightingCriteria.TfIdf  
+          });
+      </code>
+      </example>
+    </member>
+
+    <member name="SentimentAnalyzer">
+      <summary>
+        Uses a pretrained sentiment model to score input strings.
+      </summary>
+      <remarks>
+        <para>The Sentiment transform returns the probability that the sentiment of a natural text is positive. </para>
+        <para>
+          The model was trained with the <a href="http://anthology.aclweb.org/P/P14/P14-1146.pdf">Sentiment-specific word embedding (SSWE)</a>  and NGramFeaturizer on Twitter sentiment data,
+          similarly to the sentiment analysis part of the
+          <a href="https://www.microsoft.com/cognitive-services/en-us/text-analytics-api">Text Analytics cognitive service</a>. 
+          The transform outputs a score between 0 and 1 as a sentiment prediction 
+          (where 0 is a negative sentiment and 1 is a positive sentiment).</para> 
+          <para>Currently it supports only English.</para>
+      </remarks>
+    </member>
+    <exaple>
+      <example name="SentimentAnalyzer">
+        <code language="csharp">
+          pipeline.Add(new SentimentAnalyzer()
+          { 
+            Source = &quot;TextColumn&quot; 
+          });
+        </code>
+      </example>
+    </exaple>
+
+    <member name="CharacterTokenizer">
+      <summary>
+        This transform breaks text into individual tokens, each consisting of an individual character.
+      </summary>
+      <remarks>
+      This transform is not typically used on its own, but it is one of the transforms composing the 
+      <see cref="Microsoft.ML.Transforms.TextFeaturizer">Text Featurizer</see>. 
+      </remarks>
+      <seealso cref="Microsoft.ML.Transforms.WordTokenizer"/>
+      <seealso cref="Microsoft.ML.Transforms.TextToKey"/>
+      <seealso cref="Microsoft.ML.Transforms.NGramTranslator"/>
+      <seealso cref="Microsoft.ML.Transforms.TextFeaturizer"/>
+      <example>
+        <code language="csharp">
+          pipeline.Add(new CharacterTokenizer(&quot;TextCol1&quot; , &quot;TextCol2&quot; ));
+        </code>
+      </example>
+    </member>
+
+    <member name="LightLDA">
+      <summary>
+        The LDA transform implements LightLDA, a state-of-the-art implementation of Latent Dirichlet Allocation.
+      </summary>
+      <remarks>
+        Latent Dirichlet Allocation is a well-known topic modeling algorithm that infers topical structure from text data,
+        and can be used to featurize any text fields as low-dimensional topical vectors. 
+        <para>LightLDA is an extremely efficient implementation of LDA developed in MSR-Asia that incorporates a number of 
+         optimization techniques. See <a href="http://arxiv.org/abs/1412.1576">LightLDA: Big Topic Models on Modest Compute Clusters</a>.
+        </para>
+        <para>
+          With the LDA transform, ML.NET users can train a topic model to produce 1 million topics with 1 million vocabulary
+          on a 1-billion-token document set one a single machine in a few hours (typically, LDA at this scale takes days and requires large clusters).
+          The most significant innovation is a super-efficient O(1) <a href="https://en.wikipedia.org/wiki/Metropolis–Hastings_algorithm">Metropolis-Hastings sampling algorithm</a>,
+          whose running cost is (surprisingly) agnostic of model size,
+          allowing it to converges nearly an order of magnitude faster than other <a href="https://en.wikipedia.org/wiki/Gibbs_sampling">Gibbs samplers.</a>
+        </para>
+        <para>
+          For more details please see original LightLDA paper, and its open source implementation. 
+          <list>
+            <item><description><a href="http://arxiv.org/abs/1412.1576"> LightLDA: Big Topic Models on Modest Computer Clusters</a></description></item>
+            <item><description><a href=" https://github.com/Microsoft/LightLDA">LightLDA </a></description></item>
+          </list>
+        </para>
+      </remarks>
+    </member>
+    <example name="LightLDA">
+      <example>
+        <code language="csharp">
+          pipeline.Add(new LightLda(("InTextCol" , "OutTextCol")));
+        </code>
+      </example>
+    </example>
+
+  </members>
+</doc>
diff --git a/src/Microsoft.ML.Transforms/UngroupTransform.cs b/src/Microsoft.ML.Transforms/UngroupTransform.cs
index 49cae8ae6e..ad6c31ae96 100644
--- a/src/Microsoft.ML.Transforms/UngroupTransform.cs
+++ b/src/Microsoft.ML.Transforms/UngroupTransform.cs
@@ -22,28 +22,28 @@
 
 namespace Microsoft.ML.Runtime.Data
 {
-    /// <summary>
-    /// This can be thought of as an inverse of <see cref="GroupTransform"/>. For all specified vector columns 
-    /// ("pivot" columns), performs the "ungroup" (or "unroll") operation as outlined below.
-    /// 
-    /// If the only pivot column is called P, and has size K, then for every row of the input we will produce 
-    /// K rows, that are identical in all columns except P. The column P will become a scalar column, and this 
-    /// column will hold all the original values of input's P, one value per row, in order. The order of columns 
-    /// will remain the same.
-    /// 
-    /// Variable-length pivot columns are supported (including zero, which will eliminate the row from the result).
-    /// 
-    /// Multiple pivot columns are also supported:
-    /// * A number of output rows is controlled by the 'mode' parameter. 
-    ///     - outer: it is equal to the maximum length of pivot columns,
-    ///     - inner: it is equal to the minimum length of pivot columns,
-    ///     - first: it is equal to the length of the first pivot column.
-    /// * If a particular pivot column has size that is different than the number of output rows, the extra slots will
-    /// be ignored, and the missing slots will be 'padded' with default values.
-    /// 
-    /// All metadata is preserved for the retained columns. For 'unrolled' columns, all known metadata
-    /// except slot names is preserved.
-    /// </summary>
+
+    // This can be thought of as an inverse of GroupTransform. For all specified vector columns 
+    // ("pivot" columns), performs the "ungroup" (or "unroll") operation as outlined below.
+    // 
+    // If the only pivot column is called P, and has size K, then for every row of the input we will produce 
+    // K rows, that are identical in all columns except P. The column P will become a scalar column, and this 
+    // column will hold all the original values of input's P, one value per row, in order. The order of columns 
+    // will remain the same.
+    // 
+    // Variable-length pivot columns are supported (including zero, which will eliminate the row from the result).
+    // 
+    // Multiple pivot columns are also supported:
+    // * A number of output rows is controlled by the 'mode' parameter. 
+    //     - outer: it is equal to the maximum length of pivot columns,
+    //     - inner: it is equal to the minimum length of pivot columns,
+    //     - first: it is equal to the length of the first pivot column.
+    // * If a particular pivot column has size that is different than the number of output rows, the extra slots will
+    // be ignored, and the missing slots will be 'padded' with default values.
+    // 
+    // All metadata is preserved for the retained columns. For 'unrolled' columns, all known metadata
+    // except slot names is preserved.
+    /// <include file='doc.xml' path='doc/members/member[@name="Ungroup"]/*' />
     public sealed class UngroupTransform : TransformBase
     {
         public const string Summary = "Un-groups vector columns into sequences of rows, inverse of Group transform";
@@ -653,7 +653,12 @@ private ValueGetter<T> MakeGetter<T>(int col, PrimitiveType itemType)
 
     public static partial class GroupingOperations
     {
-        [TlcModule.EntryPoint(Name = "Transforms.Segregator", Desc = UngroupTransform.Summary, UserName = UngroupTransform.UserName, ShortName = UngroupTransform.ShortName)]
+        [TlcModule.EntryPoint(Name = "Transforms.Segregator", 
+            Desc = UngroupTransform.Summary, 
+            UserName = UngroupTransform.UserName, 
+            ShortName = UngroupTransform.ShortName,
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/member[@name=""Ungroup""]/*' />",
+                                 @"<include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/example[@name=""Ungroup""]/*' />"})]
         public static CommonOutputs.TransformOutput Ungroup(IHostEnvironment env, UngroupTransform.Arguments input)
         {
             Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.Transforms/WhiteningTransform.cs b/src/Microsoft.ML.Transforms/WhiteningTransform.cs
index 8b36078a04..be83e87c89 100644
--- a/src/Microsoft.ML.Transforms/WhiteningTransform.cs
+++ b/src/Microsoft.ML.Transforms/WhiteningTransform.cs
@@ -42,7 +42,7 @@ public enum WhiteningKind
     /// That is, PCA whitening is essentially just a PCA + rescale.
     /// ZCA whitening tries to make resulting data to look more like input data by rotating it back to the 
     /// original input space.
-    /// More information: <see href="http://ufldl.stanford.edu/wiki/index.php/Whitening"/>
+    /// More information: <a href="http://ufldl.stanford.edu/wiki/index.php/Whitening">http://ufldl.stanford.edu/wiki/index.php/Whitening</a>
     /// </summary>
     public sealed class WhiteningTransform : OneToOneTransformBase
     {
diff --git a/src/Microsoft.ML.Transforms/doc.xml b/src/Microsoft.ML.Transforms/doc.xml
index 7482c3a272..cb6ef6af25 100644
--- a/src/Microsoft.ML.Transforms/doc.xml
+++ b/src/Microsoft.ML.Transforms/doc.xml
@@ -1,5 +1,5 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
-<docs>
+<doc>
   <members>
 
     <member name="CategoricalHashOneHotVectorizer">
@@ -11,12 +11,19 @@
         value and using the hash as an index in the bag.
         If the input column is a vector, a single indicator bag is returned for it.
       </remarks>
+    </member>
+    <example name="CategoricalHashOneHotVectorizer">
       <example>
-        <code>
-          pipeline.Add(new CategoricalHashOneHotVectorizer(&quot;Text1&quot;) { HashBits = 10, Seed = 314489979, OutputKind = CategoricalTransformOutputKind.Bag });
+        <code language="csharp">
+          pipeline.Add(new CategoricalHashOneHotVectorizer(&quot;Text1&quot;) 
+          { 
+            HashBits = 10, 
+            Seed = 314489979, 
+            OutputKind = CategoricalTransformOutputKind.Bag 
+          });
         </code>
       </example>
-    </member>
+    </example>
 
     <member name="CategoricalOneHotVectorizer">
       <summary>
@@ -39,14 +46,347 @@
         for Ind they are concatenated and for Bag they are added.
         When the source column is a singleton, the Ind and Bag options are identical.</para>
       </remarks>
+    </member>
+    <example name="CategoricalOneHotVectorizer">
       <example>
-        An example of how to add the CategoricalOneHotVectorizer transform to a pipeline with two text column 
+        An example of how to add the CategoricalOneHotVectorizer transform to a pipeline with two text column
         features named &quot;Text1&quot; and &quot;Text2&quot;.
-        <code>
+        <code language="csharp">
           pipeline.Add(new CategoricalOneHotVectorizer(&quot;Text1&quot;, &quot;Text1&quot;));
         </code>
       </example>
+    </example>
+
+    <member name="CountFeatureSelection">
+      <summary>
+        Selects the slots for which the count of non-default values is greater than or equal to a threshold.
+      </summary>
+      <remarks>
+        <para>
+          This transform uses a set of aggregators to count the number of non-default values for each slot and
+          instantiates a <see cref="Microsoft.ML.Runtime.Data.DropSlotsTransform"/> to actually drop the slots.
+          This transform is useful when applied together with a <see cref="T:Microsoft.ML.CategoricalHashOneHotVectorizer"/>. 
+          The count feature selection can remove those features generated by the hash transform that have no data in the examples.
+        </para>
+      </remarks>
+    </member>
+    <example name="CountFeatureSelection">
+       <example>
+        <code language="csharp">
+          pipeline.Add(new FeatureSelectorByCount
+          { 
+            Column = new[]{ &quot;Feature1&quot; }, 
+            Count = 2 
+          });
+        </code>
+      </example>
+    </example>
+
+    <member name="MutualInformationFeatureSelection">
+      <summary>
+        Selects the top k slots across all specified columns ordered by their mutual information with the label column.
+      </summary>
+      <remarks>
+        <para>
+          The mutual information of two random variables X and Y is a measure of the mutual dependence between the variables.
+          Formally, the mutual information can be written as:
+        </para>
+        <para>I(X;Y) = E[log(p(x,y)) - log(p(x)) - log(p(y))]</para>
+        <para>where the expectation is taken over the joint distribution of X and Y. 
+        Here p(x,y) is the joint probability density function of X and Y, p(x) and p(y) are the marginal probability density functions of X and Y respectively. 
+        In general, a higher mutual information between the dependent variable (or label) and an independent variable (or feature) means 
+        that the label has higher mutual dependence over that feature.
+        It keeps the top SlotsInOutput features with the largest mutual information with the label.
+        </para>
+      </remarks>
+    </member>
+    <example name="MutualInformationFeatureSelection">
+      <example>
+        <code language="csharp">
+          pipeline.Add(new FeatureSelectorByMutualInformation
+          { 
+            Column = new[]{ &quot;Feature1&quot; }, 
+            SlotsInOutput = 6 
+           });
+        </code>
+      </example>
+    </example>
+
+    <member name="OptionalColumnTransform">
+      <summary>
+        Creates a new column with the specified type and default values.
+      </summary>
+      <remarks>
+        If the user wish to create additional columns with a particular type and default values,
+        or replicated the values from one column to another, changing their type, they can do so using this transform.
+        This transform can be used as a workaround to create a Label column after deserializing a model, for prediction.
+        Some transforms in the serialized model operate on the Label column, and would throw errors during prediction if such a column is not found.
+      </remarks>
+    </member>
+    <example name="OptionalColumnTransform">
+      <example>
+        <code language="csharp">
+          pipeline.Add(new OptionalColumnCreator 
+          { 
+            Column = new[]{ &quot;OptColumn&quot;} 
+          });
+        </code>
+      </example>
+    </example>
+
+    <member name="HashJoin">
+      <summary>
+        Converts multiple column values into hashes. 
+        This transform accepts both numeric and text inputs, both single and vector-valued columns. 
+      </summary>
+      <remarks>
+        This transform can be helpful for ranking and cross-validation. In the case of ranking, where the GroupIdColumn column is required,
+        and needs to be of a key type you can use the <see cref="T:Microsoft.ML.Transforms.CategoricalHashOneHotVectorizer" /> to hash the text value of a single GroupID column into a key value.
+        If the GroupID is the combination of the values from multiple columns, you can use the HashConverter to hash multiple text columns into one key column. 
+        Similarly with CrossValidator and the StratificationColumn. 
+      </remarks>
+    </member>
+    <example name="HashJoin">
+      <example>
+        <code language="csharp">
+          pipeline.Add(new HashConverter(&quot;Column1&quot;, &quot;Column2&quot;));
+        </code>
+      </example>
+    </example>
+
+    <member name="NADrop">
+      <summary>
+        Removes missing values from vector type columns.
+      </summary>
+      <seealso cref="T:Microsoft.ML.Runtime.Data.MetadataUtils.Kinds.HasMissingValues"></seealso>
+    </member>
+    <example>
+      <example>
+        <code language="csharp">
+          pipeline.Add(new MissingValuesDropper(&quot;Column1&quot;));
+        </code>
+      </example>
+    </example>
+
+    <member name="NAIndicator">
+      <summary>
+        This transform can transform either scalars or vectors (both fixed and variable size),
+        creating output columns that indicate, through the true/false booleans whether the row has a missing value.
+      </summary>
+      <seealso cref=" Microsoft.ML.Runtime.Data.MetadataUtils.Kinds.HasMissingValues"></seealso>
+    </member>
+    <example name="NAIndicator">
+      <example>
+        <code language="csharp">
+          pipeline.Add(new MissingValueIndicator(&quot;Column1&quot;));
+        </code>
+      </example>
+    </example>
+    
+    <member name="NAReplace">
+      <summary>
+        Create an output column of the same type and size of the input column, 
+        where missing values are replaced with either the default value or the mean/min/max value (for non-text columns only). 
+      </summary>
+      <remarks>
+        This transform can transform either scalars or vectors (both fixed and variable size),
+        creating output columns that are identical to the input columns except for replacing NA values
+        with either the default value, user input, or imputed values (min/max/mean are currently supported).
+        Imputation modes are supported for vectors both by slot and across all slots.
+      </remarks>
+      <seealso cref=" Microsoft.ML.Runtime.Data.MetadataUtils.Kinds.HasMissingValues"></seealso>
+    </member>
+    <example name="NAReplace">
+      <example>
+        <code language="csharp">
+          pipeline.Add(new MissingValueSubstitutor(&quot;FeatureCol&quot;)
+          { 
+            ReplacementKind = NAReplaceTransformReplacementKind.Mean 
+          });
+        </code>
+      </example>
+    </example>
+
+    <member name="NAHandle">
+      <summary>
+        Handle missing values by replacing them with either the default value or the indicated value.
+      </summary>
+      <remarks>
+        This transform handles missing values in the input columns. For each input column, it creates an output column
+        where the missing values are replaced by one of these specified values:
+        <list type="bullet">
+          <item>
+            <description>The default value of the appropriate type.</description>
+          </item>
+          <item>
+            <description>The mean value of the appropriate type.</description>
+          </item>
+          <item>
+            <description>The max value of the appropriate type.</description>
+          </item>
+          <item>
+            <description>The min value of the appropriate type.</description>
+          </item>
+        </list>
+        <para>The last three work only for numeric/TimeSpan/DateTime kind columns.</para>
+        <para>
+          The output column can also optionally include an indicator vector for which slots were missing in the input column.
+          This can be done only when the indicator vector type can be converted to the input column type, i.e. only for numeric columns.
+        </para>
+        <para>
+          When computing the mean/max/min value, there is also an option to compute it over the whole column instead of per slot.
+          This option has a default value of true for variable length vectors, and false for known length vectors.
+          It can be changed to true for known length vectors, but it results in an error if changed to false for variable length vectors.
+        </para>
+      </remarks>
+      <seealso cref=" Microsoft.ML.Runtime.Data.MetadataUtils.Kinds.HasMissingValues"/>
+      <seealso cref="T:Microsoft.ML.Data.DataKind"/>
+    </member>
+    <example name="NAHandle">
+      <example>
+        <code language="csharp">
+          pipeline.Add(new MissingValueHandler(&quot;FeatureCol&quot;, &quot;CleanFeatureCol&quot;) 
+          { 
+              ReplaceWith  = NAHandleTransformReplacementKind.Mean 
+          });
+        </code>
+      </example>
+    </example>
+    
+    <member name="LpNormalize">
+      <summary>
+         The LpNormalizer transforms, normalizes vectors (rows) individually by rescaling them to unit norm (L2, L1 or LInf). 
+         <para>Performs the following operation on a vector X:</para> 
+         <para>Y = (X - M) / D</para> 
+         <para>where M is mean and D is either L2 norm, L1 norm or LInf norm.</para>
+       </summary>
+      <remarks>
+        Scaling inputs to unit norms is a common operation for text classification or clustering.
+        For more information see: <a href="http://www.cs.stanford.edu/~acoates/papers/coatesleeng_aistats_2011.pdf"></a>
+      </remarks>
+      <seealso cref=" Microsoft.ML.Transforms.GcNormalize"></seealso>
+      <example>
+        <code language="csharp">
+          pipeline.Add(new LpNormalizer(&quot;FeatureCol&quot;)
+          { 
+            NormKind = LpNormNormalizerTransformNormalizerKind.L1Norm
+          });
+        </code>
+      </example>
+    </member>
+    
+  <member name="GcNormalize">
+      <summary>
+        <para>Performs a global contrast normalization on input values:</para>
+        <para>Y = (s * X - M) / D</para> 
+        <para>where s is a scale, M is mean and D is either the L2 norm or standard deviation.</para>
+       </summary>
+      <remarks>
+        Scaling inputs to unit norms is a common operation for text classification or clustering.
+        For more information see: <a href="http://www.cs.stanford.edu/~acoates/papers/coatesleeng_aistats_2011.pdf"></a>
+      </remarks>
+      <seealso cref=" Microsoft.ML.Transforms.LpNormalizer"></seealso>
+      <example>
+        <code language="csharp">
+          pipeline.Add(new GlobalContrastNormalizer(&quot;FeatureCol&quot;)
+          { 
+            SubMean= false
+          });
+        </code>
+      </example>
+    </member>
+    
+  <member name="Ungroup">
+      <summary>
+        Un-groups vector columns into sequences of rows, inverse of Group transform.
+       </summary>
+      <remarks>
+        <para>This can be thought of as an inverse of the <see cref="T:Microsoft.ML.Transforms.CombinerByContiguousGroupId"/>.  
+        For all specified vector columns ("pivot" columns), performs the "ungroup" (or "unroll") operation as outlined below.
+        </para>
+        <para>If the only pivot column is called P, and has size K, then for every row of the input we will produce 
+         K rows, that are identical in all columns except P. The column P will become a scalar column, and this 
+         column will hold all the original values of input's P, one value per row, in order. The order of columns 
+         will remain the same.
+        </para>
+        <para>Variable-length pivot columns are supported (including zero, which will eliminate the row from the result).</para>
+        <para>Multiple pivot columns are also supported:</para>
+        <list type="bullet">
+          <item><description>A number of output rows is controlled by the 'mode' parameter. 
+            <list type="bullet">
+              <item><term>outer</term><description> it is equal to the maximum length of pivot columns</description></item>
+              <item><term>inner</term><description> it is equal to the minimum length of pivot columns</description></item>
+              <item><term>first</term><description> it is equal to the length of the first pivot column</description></item>
+            </list>
+            </description>
+          </item>
+          <item><description>
+              If a particular pivot column has size that is different than the number of output rows, the extra slots will
+              be ignored, and the missing slots will be 'padded' with default values.
+            </description></item>
+        </list>
+        <para>All metadata is preserved for the retained columns. For 'unrolled' columns, all known metadata
+        except slot names is preserved.
+        </para>
+      </remarks>
+    </member>
+    <example name="Ungroup">
+      <example>
+        <code language="csharp">
+          pipeline.Add(new Segregator
+          { 
+              Column = new[]{&quot;Column1&quot; },
+              Mode = UngroupTransformUngroupMode.First
+          });
+        </code>
+      </example>
+    </example>
+
+    <member name="KeyToText">
+      <summary>
+        Helps retrieving the original values from a key column. 
+      </summary>
+      <remarks>
+        The KeyToTextConverter is the complement of the <see  cref="TextToKeyConverter"/> transform.
+        Since key values are an enumeration into the set of keys, most transforms that produce key valued outputs
+        corresponding to input values will often, wherever possible, associate a piece of KeyValue metadata with that dataset.
+        Transforming values into a categorical variable would be of limited use,
+        if we couldn't somehow backtrack to figure out what those categories actually mean.
+        The KeyToTextConverter enables that functionality.
+      </remarks>
+      <seealso cref="Microsoft.ML.Transforms.HashConverter"/>
+      <seealso cref="Microsoft.ML.Transforms.TextToKeyConverter"/>
+      <example>
+        <code language="csharp">
+          pipeline.Add(new KeyToTextConverter((&quot;InColumn&quot;, &quot;OutColumn&quot; )));
+        </code>
+      </example>
+    </member>
+    
+    <member name="Group">
+      <summary>
+       Groups values of a scalar column into a vector, by a contiguous group ID.
+      </summary>
+      <remarks>
+       The CombinerByContiguousGroupId transform groups the consecutive rows that share the specified group key (or keys). 
+       Both group keys and the aggregated values can be of arbitrary non-vector types. 
+       The resulting data will have all the group key columns preserved, 
+       and the aggregated columns will become variable-length vectors of the original types.
+       <para>This transform essentially performs the following SQL-like operation:</para> 
+       <para>SELECT GroupKey1, GroupKey2, ... GroupKeyK, LIST(Value1), LIST(Value2), ... LIST(ValueN)</para> 
+       <para>FROM Data</para> 
+       <para>GROUP BY GroupKey1, GroupKey2, ... GroupKeyK.</para> 
+      </remarks>
+       <seealso cref="Microsoft.ML.Transforms.Segregator"/>
+      <example>
+        <code language="csharp">
+          pipeline.Add(new CombinerByContiguousGroupId
+          { 
+            GroupKey = new []{&quot;Key1&quot;, &quot;Key2&quot; } 
+          });
+        </code>
+      </example>
     </member>
-  
+    
   </members>
-</docs>
\ No newline at end of file
+</doc>
\ No newline at end of file
diff --git a/src/Microsoft.ML/CSharpApi.cs b/src/Microsoft.ML/CSharpApi.cs
index 83723638e1..b6ce7778e1 100644
--- a/src/Microsoft.ML/CSharpApi.cs
+++ b/src/Microsoft.ML/CSharpApi.cs
@@ -3080,9 +3080,7 @@ public sealed partial class OneVersusAllMacroSubGraphOutput
 
         }
 
-        /// <summary>
-        /// One-vs-All macro (OVA)
-        /// </summary>
+        /// <include file='../Microsoft.ML.StandardLearners/Standard/MultiClass/doc.xml' path='doc/members/member[@name="OVA"]'/>
         public sealed partial class OneVersusAll : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -4135,7 +4133,8 @@ public sealed class Output
     namespace Trainers
     {
 
-        /// <include file='../Microsoft.ML.StandardLearners/Standard/Online/doc.xml' path='docs/members/member[@name="AP"]/*' />
+        /// <include file='../Microsoft.ML.StandardLearners/Standard/Online/doc.xml' path='doc/members/member[@name="AP"]/*' />
+        /// <include file='../Microsoft.ML.StandardLearners/Standard/Online/doc.xml' path='doc/members/example[@name="AP"]/*' />
         public sealed partial class AveragedPerceptronBinaryClassifier : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -4639,7 +4638,8 @@ public enum Bundle : byte
         }
 
 
-        /// <include file='../Microsoft.ML.FastTree/doc.xml' path='docs/members/member[@name="FastForest"]/*' />
+        /// <include file='../Microsoft.ML.FastTree/doc.xml' path='doc/members/member[@name="FastForest"]/*' />
+        /// <include file='../Microsoft.ML.FastTree/doc.xml' path='doc/members/example[@name="FastForestBinaryClassifier"]/*' />
         public sealed partial class FastForestBinaryClassifier : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithGroupId, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -4930,7 +4930,8 @@ public FastForestBinaryClassifierPipelineStep(Output output)
     namespace Trainers
     {
 
-        /// <include file='../Microsoft.ML.FastTree/doc.xml' path='docs/members/member[@name="FastForest"]/*' />
+        /// <include file='../Microsoft.ML.FastTree/doc.xml' path='doc/members/member[@name="FastForest"]/*' />
+        /// <include file='../Microsoft.ML.FastTree/doc.xml' path='doc/members/example[@name="FastForestRegressor"]/*' />
         public sealed partial class FastForestRegressor : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithGroupId, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -5217,7 +5218,8 @@ public enum BoostedTreeArgsOptimizationAlgorithmType
         }
 
 
-        /// <include file='../Microsoft.ML.FastTree/doc.xml' path='docs/members/member[@name="FastTree"]/*' />
+        /// <include file='../Microsoft.ML.FastTree/doc.xml' path='doc/members/member[@name="FastTree"]/*' />
+        /// <include file='../Microsoft.ML.FastTree/doc.xml' path='doc/members/example[@name="FastTreeBinaryClassifier"]/*' />
         public sealed partial class FastTreeBinaryClassifier : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithGroupId, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -5606,7 +5608,8 @@ public FastTreeBinaryClassifierPipelineStep(Output output)
     namespace Trainers
     {
 
-        /// <include file='../Microsoft.ML.FastTree/doc.xml' path='docs/members/member[@name="FastTree"]/*' />
+        /// <include file='../Microsoft.ML.FastTree/doc.xml' path='doc/members/member[@name="FastTree"]/*' />
+        /// <include file='../Microsoft.ML.FastTree/doc.xml' path='doc/members/example[@name="FastTreeRanker"]/*' />
         public sealed partial class FastTreeRanker : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithGroupId, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -6030,7 +6033,8 @@ public FastTreeRankerPipelineStep(Output output)
     namespace Trainers
     {
 
-        /// <include file='../Microsoft.ML.FastTree/doc.xml' path='docs/members/member[@name="FastTree"]/*' />
+        /// <include file='../Microsoft.ML.FastTree/doc.xml' path='doc/members/member[@name="FastTree"]/*' />
+        /// <include file='../Microsoft.ML.FastTree/doc.xml' path='doc/members/example[@name="FastTreeRegressor"]/*' />
         public sealed partial class FastTreeRegressor : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithGroupId, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -6414,7 +6418,7 @@ public FastTreeRegressorPipelineStep(Output output)
     namespace Trainers
     {
 
-        /// <include file='../Microsoft.ML.FastTree/doc.xml' path='docs/members/member[@name="FastTreeTweedieRegression"]/*' />
+        /// <include file='../Microsoft.ML.FastTree/doc.xml' path='doc/members/member[@name="FastTreeTweedieRegression"]/*' />
         public sealed partial class FastTreeTweedieRegressor : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithGroupId, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -6803,7 +6807,8 @@ public FastTreeTweedieRegressorPipelineStep(Output output)
     namespace Trainers
     {
 
-        /// <include file='../Microsoft.ML.StandardLearners/FactorizationMachine/doc.xml' path='docs/members/member[@name="FieldAwareFactorizationMachineBinaryClassifier"]/*' />
+        /// <include file='../Microsoft.ML.StandardLearners/FactorizationMachine/doc.xml' path='doc/members/member[@name="FieldAwareFactorizationMachineBinaryClassifier"]/*' />
+        /// <include file='../Microsoft.ML.StandardLearners/FactorizationMachine/doc.xml' path='doc/members/example[@name="FieldAwareFactorizationMachineBinaryClassifier"]/*' />
         public sealed partial class FieldAwareFactorizationMachineBinaryClassifier : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -7230,7 +7235,8 @@ public enum KMeansPlusPlusTrainerInitAlgorithm
         }
 
 
-        /// <include file='../Microsoft.ML.KMeansClustering/doc.xml' path='docs/members/member[@name="KMeans++"]/*' />
+        /// <include file='../Microsoft.ML.KMeansClustering/doc.xml' path='doc/members/member[@name="KMeans++"]/*' />
+        /// <include file='../Microsoft.ML.KMeansClustering/doc.xml' path='doc/members/example[@name="KMeans++"]/*' />
         public sealed partial class KMeansPlusPlusClusterer : Microsoft.ML.Runtime.EntryPoints.CommonInputs.IUnsupervisedTrainerWithWeight, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -7346,7 +7352,8 @@ public enum LightGbmArgumentsEvalMetricType
         }
 
 
-        /// <include file='../Microsoft.ML.LightGBM/doc.xml' path='docs/members/member[@name="LightGBM"]/*' />
+        /// <include file='../Microsoft.ML.LightGBM/doc.xml' path='doc/members/member[@name="LightGBM"]/*' />
+        /// <include file='../Microsoft.ML.LightGBM/doc.xml' path='doc/members/example[@name="LightGbmBinaryClassifier"]/*' />
         public sealed partial class LightGbmBinaryClassifier : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithGroupId, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -7549,7 +7556,8 @@ public LightGbmBinaryClassifierPipelineStep(Output output)
     namespace Trainers
     {
 
-        /// <include file='../Microsoft.ML.LightGBM/doc.xml' path='docs/members/member[@name="LightGBM"]/*' />
+        /// <include file='../Microsoft.ML.LightGBM/doc.xml' path='doc/members/member[@name="LightGBM"]/*' />
+        /// <include file='../Microsoft.ML.LightGBM/doc.xml' path='doc/members/example[@name="LightGbmClassifier"]/*' />
         public sealed partial class LightGbmClassifier : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithGroupId, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -7752,7 +7760,8 @@ public LightGbmClassifierPipelineStep(Output output)
     namespace Trainers
     {
 
-        /// <include file='../Microsoft.ML.LightGBM/doc.xml' path='docs/members/member[@name="LightGBM"]/*' />
+        /// <include file='../Microsoft.ML.LightGBM/doc.xml' path='doc/members/member[@name="LightGBM"]/*' />
+        /// <include file='../Microsoft.ML.LightGBM/doc.xml' path='doc/members/example[@name="LightGbmRanker"]/*' />
         public sealed partial class LightGbmRanker : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithGroupId, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -7955,7 +7964,8 @@ public LightGbmRankerPipelineStep(Output output)
     namespace Trainers
     {
 
-        /// <include file='../Microsoft.ML.LightGBM/doc.xml' path='docs/members/member[@name="LightGBM"]/*' />
+        /// <include file='../Microsoft.ML.LightGBM/doc.xml' path='doc/members/member[@name="LightGBM"]/*' />
+        /// <include file='../Microsoft.ML.LightGBM/doc.xml' path='doc/members/example[@name="LightGbmRegressor"]/*' />
         public sealed partial class LightGbmRegressor : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithGroupId, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -8293,8 +8303,8 @@ public LinearSvmBinaryClassifierPipelineStep(Output output)
     namespace Trainers
     {
 
-        /// <include file='../Microsoft.ML.StandardLearners/Standard/LogisticRegression/doc.xml' path='docs/members/member[@name="LBFGS"]/*' />
-        /// <include file='../Microsoft.ML.StandardLearners/Standard/LogisticRegression/doc.xml' path='docs/members/example[@name="LogisticRegressionBinaryClassifier"]/*' />
+        /// <include file='../Microsoft.ML.StandardLearners/Standard/LogisticRegression/doc.xml' path='doc/members/member[@name="LBFGS"]/*' />
+        /// <include file='../Microsoft.ML.StandardLearners/Standard/LogisticRegression/doc.xml' path='doc/members/example[@name="LogisticRegressionBinaryClassifier"]/*' />
         public sealed partial class LogisticRegressionBinaryClassifier : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -8442,8 +8452,8 @@ public LogisticRegressionBinaryClassifierPipelineStep(Output output)
     namespace Trainers
     {
 
-        /// <include file='../Microsoft.ML.StandardLearners/Standard/LogisticRegression/doc.xml' path='docs/members/member[@name="LBFGS"]/*' />
-        /// <include file='../Microsoft.ML.StandardLearners/Standard/LogisticRegression/doc.xml' path='docs/members/example[@name="LogisticRegressionClassifier"]/*' />
+        /// <include file='../Microsoft.ML.StandardLearners/Standard/LogisticRegression/doc.xml' path='doc/members/member[@name="LBFGS"]/*' />
+        /// <include file='../Microsoft.ML.StandardLearners/Standard/LogisticRegression/doc.xml' path='doc/members/example[@name="LogisticRegressionClassifier"]/*' />
         public sealed partial class LogisticRegressionClassifier : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -8591,9 +8601,8 @@ public LogisticRegressionClassifierPipelineStep(Output output)
     namespace Trainers
     {
 
-        /// <summary>
-        /// Train a MultiClassNaiveBayesTrainer.
-        /// </summary>
+        /// <include file='../Microsoft.ML.StandardLearners/Standard/MultiClass/doc.xml' path='doc/members/member[@name="MultiClassNaiveBayesTrainer"]'/>
+        /// <include file='../Microsoft.ML.StandardLearners/Standard/MultiClass/doc.xml' path='doc/members/example[@name="MultiClassNaiveBayesTrainer"]'/>
         public sealed partial class NaiveBayesClassifier : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -8664,7 +8673,8 @@ public NaiveBayesClassifierPipelineStep(Output output)
     namespace Trainers
     {
 
-        /// <include file='../Microsoft.ML.StandardLearners/Standard/Online/doc.xml' path='docs/members/member[@name="OGD"]/*' />
+        /// <include file='../Microsoft.ML.StandardLearners/Standard/Online/doc.xml' path='doc/members/member[@name="OGD"]/*' />
+        /// <include file='../Microsoft.ML.StandardLearners/Standard/Online/doc.xml' path='doc/members/example[@name="OGD"]/*' />
         public sealed partial class OnlineGradientDescentRegressor : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -8817,7 +8827,8 @@ public OnlineGradientDescentRegressorPipelineStep(Output output)
     namespace Trainers
     {
 
-        /// <include file='../Microsoft.ML.PCA/doc.xml' path='docs/members/member[@name="PCA"]/*' />
+        /// <include file='../Microsoft.ML.PCA/doc.xml' path='doc/members/member[@name="PCA"]/*' />
+        /// <include file='../Microsoft.ML.PCA/doc.xml' path='doc/members/example[@name="PcaAnomalyDetector"]/*' />
         public sealed partial class PcaAnomalyDetector : Microsoft.ML.Runtime.EntryPoints.CommonInputs.IUnsupervisedTrainerWithWeight, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -8911,7 +8922,8 @@ public PcaAnomalyDetectorPipelineStep(Output output)
     namespace Trainers
     {
 
-        /// <include file='../Microsoft.ML.StandardLearners/Standard/PoissonRegression/doc.xml' path='docs/members/member[@name="PoissonRegression"]/*' />
+        /// <include file='../Microsoft.ML.StandardLearners/Standard/PoissonRegression/doc.xml' path='doc/members/member[@name="PoissonRegression"]/*' />
+        /// <include file='../Microsoft.ML.StandardLearners/Standard/PoissonRegression/doc.xml' path='doc/members/example[@name="PoissonRegression"]/*' />
         public sealed partial class PoissonRegressor : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -9054,7 +9066,8 @@ public PoissonRegressorPipelineStep(Output output)
     namespace Trainers
     {
 
-        /// <include file='../../docs/code/xmlIncludes/Learners.xml' path='docs/members/member[@name="StochasticDualCoordinateAscentBinaryClassifier"]/*' />
+        /// <include file='../Microsoft.ML.StandardLearners/Standard/doc.xml' path='doc/members/member[@name="SDCA"]/*' />
+        /// <include file='../Microsoft.ML.StandardLearners/Standard/doc.xml' path='doc/members/example[@name="StochasticDualCoordinateAscentBinaryClassifier"]/*'/>
         public sealed partial class StochasticDualCoordinateAscentBinaryClassifier : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -9193,7 +9206,8 @@ public StochasticDualCoordinateAscentBinaryClassifierPipelineStep(Output output)
     namespace Trainers
     {
 
-        /// <include file='../Microsoft.ML.StandardLearners/Standard/doc.xml' path='docs/members/member[@name="SDCA"]/*' />
+        /// <include file='../Microsoft.ML.StandardLearners/Standard/doc.xml' path='doc/members/member[@name="SDCA"]/*' />
+        /// <include file='../Microsoft.ML.StandardLearners/Standard/doc.xml' path='doc/members/example[@name="StochasticDualCoordinateAscentClassifier"]/*' />
         public sealed partial class StochasticDualCoordinateAscentClassifier : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -9316,7 +9330,8 @@ public StochasticDualCoordinateAscentClassifierPipelineStep(Output output)
     namespace Trainers
     {
 
-        /// <include file='../Microsoft.ML.StandardLearners/Standard/doc.xml' path='docs/members/member[@name="SDCA"]/*' />
+        /// <include file='../Microsoft.ML.StandardLearners/Standard/doc.xml' path='doc/members/member[@name="SDCA"]/*' />
+        /// <include file='../Microsoft.ML.StandardLearners/Standard/doc.xml' path='doc/members/example[@name="StochasticDualCoordinateAscentRegressor"]/*' />
         public sealed partial class StochasticDualCoordinateAscentRegressor : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -9919,7 +9934,8 @@ public sealed partial class CategoricalHashTransformColumn : OneToOneColumn<Cate
 
         }
 
-        /// <include file='../Microsoft.ML.Transforms/doc.xml' path='docs/members/member[@name="CategoricalHashOneHotVectorizer"]/*' />
+        /// <include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/member[@name="CategoricalHashOneHotVectorizer"]/*' />
+        /// <include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/example[@name="CategoricalHashOneHotVectorizer"]/*' />
         public sealed partial class CategoricalHashOneHotVectorizer : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -10092,7 +10108,8 @@ public sealed partial class CategoricalTransformColumn : OneToOneColumn<Categori
 
         }
 
-        /// <include file='../Microsoft.ML.Transforms/doc.xml' path='docs/members/member[@name="CategoricalOneHotVectorizer"]/*' />
+        /// <include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/member[@name="CategoricalOneHotVectorizer"]/*' />
+        /// <include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/example[@name="CategoricalOneHotVectorizer"]/*' />
         public sealed partial class CategoricalOneHotVectorizer : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -10234,9 +10251,7 @@ public sealed partial class CharTokenizeTransformColumn : OneToOneColumn<CharTok
 
         }
 
-        /// <summary>
-        /// Character-oriented tokenizer where text is considered a sequence of characters.
-        /// </summary>
+        /// <include file='../Microsoft.ML.Transforms/Text/doc.xml' path='doc/members/member[@name="CharacterTokenizer"]/*' />
         public sealed partial class CharacterTokenizer : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -10827,9 +10842,7 @@ public ColumnTypeConverterPipelineStep(Output output)
     namespace Transforms
     {
 
-        /// <summary>
-        /// Groups values of a scalar column into a vector, by a contiguous group ID
-        /// </summary>
+        /// <include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/member[@name="Group"]/*' />
         public sealed partial class CombinerByContiguousGroupId : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -11404,9 +11417,8 @@ public FeatureCombinerPipelineStep(Output output)
     namespace Transforms
     {
 
-        /// <summary>
-        /// Selects the slots for which the count of non-default values is greater than or equal to a threshold.
-        /// </summary>
+        /// <include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/member[@name="CountFeatureSelection"]'/>
+        /// <include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/example[@name="CountFeatureSelection"]'/>
         public sealed partial class FeatureSelectorByCount : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -11474,9 +11486,8 @@ public FeatureSelectorByCountPipelineStep(Output output)
     namespace Transforms
     {
 
-        /// <summary>
-        /// Selects the top k slots across all specified columns ordered by their mutual information with the label column.
-        /// </summary>
+        /// <include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/member[@name="MutualInformationFeatureSelection"]'/>
+        /// <include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/example[@name="MutualInformationFeatureSelection"]'/>
         public sealed partial class FeatureSelectorByMutualInformation : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -11583,9 +11594,7 @@ public sealed partial class LpNormNormalizerTransformGcnColumn : OneToOneColumn<
 
         }
 
-        /// <summary>
-        /// Performs a global contrast normalization on input values: Y = (s * X - M) / D, where s is a scale, M is mean and D is either L2 norm or standard deviation.
-        /// </summary>
+        /// <include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/member[@name="GcNormalize"]/*' />
         public sealed partial class GlobalContrastNormalizer : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -11742,9 +11751,8 @@ public sealed partial class HashJoinTransformColumn : OneToOneColumn<HashJoinTra
 
         }
 
-        /// <summary>
-        /// Converts column values into hashes. This transform accepts both numeric and text inputs, both single and vector-valued columns. This is a part of the Dracula transform.
-        /// </summary>
+        /// <include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/member[@name="HashJoin"]/*' />
+        /// <include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/example[@name="HashJoin"]/*' />
         public sealed partial class HashConverter : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -11881,9 +11889,7 @@ public sealed partial class KeyToValueTransformColumn : OneToOneColumn<KeyToValu
 
         }
 
-        /// <summary>
-        /// KeyToValueTransform utilizes KeyValues metadata to map key indices to the corresponding values in the KeyValues metadata.
-        /// </summary>
+        /// <include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/member[@name="KeyToText"]/*' />
         public sealed partial class KeyToTextConverter : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -12319,9 +12325,8 @@ public sealed partial class LdaTransformColumn : OneToOneColumn<LdaTransformColu
 
         }
 
-        /// <summary>
-        /// The LDA transform implements LightLDA, a state-of-the-art implementation of Latent Dirichlet Allocation.
-        /// </summary>
+        /// <include file='../Microsoft.ML.Transforms/Text/doc.xml' path='doc/members/member[@name="LightLDA"]/*' />
+        /// <include file='../Microsoft.ML.Transforms/Text/doc.xml' path='doc/members/example[@name="LightLDA"]/*' />
         public sealed partial class LightLda : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -12656,9 +12661,7 @@ public sealed partial class LpNormNormalizerTransformColumn : OneToOneColumn<LpN
 
         }
 
-        /// <summary>
-        /// Normalize vectors (rows) individually by rescaling them to unit norm (L2, L1 or LInf). Performs the following operation on a vector X: Y = (X - M) / D, where M is mean and D is either L2 norm, L1 norm or LInf norm.
-        /// </summary>
+        /// <include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/member[@name="LpNormalize"]/*' />
         public sealed partial class LpNormalizer : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -13075,9 +13078,8 @@ public sealed partial class NAHandleTransformColumn : OneToOneColumn<NAHandleTra
 
         }
 
-        /// <summary>
-        /// Handle missing values by replacing them with either the default value or the mean/min/max value (for non-text columns only). An indicator column can optionally be concatenated, if theinput column type is numeric.
-        /// </summary>
+        /// <include file='../Microsoft.ML.Data/Transforms/doc.xml' path='doc/members/member[@name="NAHandle"]/*' />
+        /// <include file='../Microsoft.ML.Data/Transforms/doc.xml' path='doc/members/example[@name="NAHandle"]/*' />
         public sealed partial class MissingValueHandler : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -13209,9 +13211,8 @@ public sealed partial class NAIndicatorTransformColumn : OneToOneColumn<NAIndica
 
         }
 
-        /// <summary>
-        /// Create a boolean output column with the same number of slots as the input column, where the output value is true if the value in the input column is missing.
-        /// </summary>
+        /// <include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/member[@name="NAIndicator"]/*' />
+        /// <include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/example[@name="NAIndicator"]/*' />
         public sealed partial class MissingValueIndicator : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -13328,9 +13329,8 @@ public sealed partial class NADropTransformColumn : OneToOneColumn<NADropTransfo
 
         }
 
-        /// <summary>
-        /// Removes NAs from vector columns.
-        /// </summary>
+        /// <include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/member[@name="NADrop"]/*' />
+        /// <include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/example[@name="NADrop"]/*' />
         public sealed partial class MissingValuesDropper : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -13433,9 +13433,8 @@ public MissingValuesDropperPipelineStep(Output output)
     namespace Transforms
     {
 
-        /// <summary>
-        /// Filters out rows that contain missing values.
-        /// </summary>
+        /// <include file='../Microsoft.ML.Data/Transforms/doc.xml' path='doc/members/member[@name="NAFilter"]/*' />
+        /// <include file='../Microsoft.ML.Data/Transforms/doc.xml' path='doc/members/example[@name="NAFilter"]/*' />
         public sealed partial class MissingValuesRowDropper : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -13541,9 +13540,8 @@ public sealed partial class NAReplaceTransformColumn : OneToOneColumn<NAReplaceT
 
         }
 
-        /// <summary>
-        /// Create an output column of the same type and size of the input column, where missing values are replaced with either the default value or the mean/min/max value (for non-text columns only).
-        /// </summary>
+        /// <include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/member[@name="NAReplace"]/*' />
+        /// <include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/example[@name="NAReplace"]/*' />
         public sealed partial class MissingValueSubstitutor : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -13729,9 +13727,7 @@ public sealed partial class NgramTransformColumn : OneToOneColumn<NgramTransform
 
         }
 
-        /// <summary>
-        /// Produces a bag of counts of ngrams (sequences of consecutive values of length 1-n) in a given vector of keys. It does so by building a dictionary of ngrams and using the id in the dictionary as the index in the bag.
-        /// </summary>
+        /// <include file='../Microsoft.ML.Transforms/Text/doc.xml' path='doc/members/member[@name="NgramTranslator"]/*' />
         public sealed partial class NGramTranslator : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -13919,9 +13915,8 @@ public NoOperationPipelineStep(Output output)
     namespace Transforms
     {
 
-        /// <summary>
-        /// If the source column does not exist after deserialization, create a column with the right type and default values.
-        /// </summary>
+        /// <include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/member[@name="OptionalColumnTransform"]/*' />
+        /// <include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/example[@name="OptionalColumnTransform"]/*' />
         public sealed partial class OptionalColumnCreator : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -14023,7 +14018,8 @@ public sealed partial class PcaTransformColumn : OneToOneColumn<PcaTransformColu
 
         }
 
-        /// <include file='../Microsoft.ML.PCA/doc.xml' path='docs/members/member[@name="PCA"]/*' />
+        /// <include file='../Microsoft.ML.PCA/doc.xml' path='doc/members/member[@name="PCA"]/*' />
+        /// <include file='../Microsoft.ML.PCA/doc.xml' path='doc/members/example[@name="PcaCalculator"]/*' />
         public sealed partial class PcaCalculator : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -14704,9 +14700,8 @@ public enum UngroupTransformUngroupMode
         }
 
 
-        /// <summary>
-        /// Un-groups vector columns into sequences of rows, inverse of Group transform
-        /// </summary>
+        /// <include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/member[@name="Ungroup"]/*' />
+        /// <include file='../Microsoft.ML.Transforms/doc.xml' path='doc/members/example[@name="Ungroup"]/*' />
         public sealed partial class Segregator : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -14774,9 +14769,8 @@ public SegregatorPipelineStep(Output output)
     namespace Transforms
     {
 
-        /// <summary>
-        /// Uses a pretrained sentiment model to score input strings
-        /// </summary>
+        /// <include file='../Microsoft.ML.Transforms/Text/doc.xml' path='doc/members/member[@name="SentimentAnalyzer"]/*' />
+        /// <include file='../Microsoft.ML.Transforms/Text/doc.xml' path='doc/members/example[@name="SentimentAnalyzer"]/*' />
         public sealed partial class SentimentAnalyzer : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -15033,9 +15027,8 @@ public sealed partial class TermLoaderArguments
 
         }
 
-        /// <summary>
-        /// A transform that turns a collection of text documents into numerical feature vectors. The feature vectors are normalized counts of (word and/or character) ngrams in a given tokenized text.
-        /// </summary>
+        /// <include file='../Microsoft.ML.Transforms/Text/doc.xml' path='doc/members/member[@name="TextTransform"]/*' />
+        /// <include file='../Microsoft.ML.Transforms/Text/doc.xml' path='doc/members/example[@name="TextTransform"]/*' />
         public sealed partial class TextFeaturizer : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -15170,9 +15163,8 @@ public TextFeaturizerPipelineStep(Output output)
     namespace Transforms
     {
 
-        /// <summary>
-        /// Converts input values (words, numbers, etc.) to index in a dictionary.
-        /// </summary>
+        /// <include file='../Microsoft.ML.Data/Transforms/doc.xml' path='doc/members/member[@name="TextToKey"]/*' />
+        /// <include file='../Microsoft.ML.Data/Transforms/doc.xml' path='doc/members/example[@name="TextToKey"]/*' />
         public sealed partial class TextToKeyConverter : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -15337,9 +15329,7 @@ public sealed class Output
     namespace Transforms
     {
 
-        /// <summary>
-        /// Trains a tree ensemble, or loads it from a file, then maps a numeric feature vector to three outputs: 1. A vector containing the individual tree outputs of the tree ensemble. 2. A vector indicating the leaves that the feature vector falls on in the tree ensemble. 3. A vector indicating the paths that the feature vector falls on in the tree ensemble. If a both a model file and a trainer are specified - will use the model file. If neither are specified, will train a default FastTree model. This can handle key labels by training a regression model towards their optionally permuted indices.
-        /// </summary>
+        /// <include file='../Microsoft.ML.FastTree/doc.xml' path='doc/members/member[@name="TreeEnsembleFeaturizerTransform"]'/>
         public sealed partial class TreeLeafFeaturizer : Microsoft.ML.Runtime.EntryPoints.CommonInputs.IFeaturizerInput, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.ILearningPipelineItem
         {
 
@@ -15463,9 +15453,8 @@ public sealed partial class DelimitedTokenizeTransformColumn : OneToOneColumn<De
 
         }
 
-        /// <summary>
-        /// The input to this transform is text, and the output is a vector of text containing the words (tokens) in the original text. The separator is space, but can be specified as any other character (or multiple characters) if needed.
-        /// </summary>
+        /// <include file='../Microsoft.ML.Transforms/Text/doc.xml' path='doc/members/member[@name="WordTokenizer"]/*' />
+        /// <include file='../Microsoft.ML.Transforms/Text/doc.xml' path='doc/members/example[@name="WordTokenizer"]/*' />
         public sealed partial class WordTokenizer : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.ILearningPipelineItem
         {
 
diff --git a/src/Microsoft.ML/Models/OnnxConverter.cs b/src/Microsoft.ML/Models/OnnxConverter.cs
index f9c1b9dc0a..6a98d9faee 100644
--- a/src/Microsoft.ML/Models/OnnxConverter.cs
+++ b/src/Microsoft.ML/Models/OnnxConverter.cs
@@ -10,7 +10,7 @@ namespace Microsoft.ML.Models
     public sealed partial class OnnxConverter
     {
         /// <summary>
-        /// <see href="https://onnx.ai/">ONNX</see> is an intermediate representation format 
+        /// <a href="https://onnx.ai/">ONNX</a> is an intermediate representation format 
         /// for machine learning models. It is used to make models portable such that you can 
         /// train a model using a toolkit and run it in another tookit's runtime, for example,
         /// you can create a model using ML.NET, export it to an ONNX-ML model file, 
diff --git a/src/Microsoft.ML/Runtime/EntryPoints/OneVersusAllMacro.cs b/src/Microsoft.ML/Runtime/EntryPoints/OneVersusAllMacro.cs
index e4a54de040..494fe6b225 100644
--- a/src/Microsoft.ML/Runtime/EntryPoints/OneVersusAllMacro.cs
+++ b/src/Microsoft.ML/Runtime/EntryPoints/OneVersusAllMacro.cs
@@ -55,13 +55,13 @@ private static Tuple<List<EntryPointNode>, Var<IPredictorModel>> ProcessClass(IH
                 ClassIndex = k,
                 Column = new[]
                 {
-                            new ML.Transforms.LabelIndicatorTransformColumn
-                            {
-                                ClassIndex = k,
-                                Name = label,
-                                Source = label
-                            }
-                        },
+                    new ML.Transforms.LabelIndicatorTransformColumn
+                    {
+                        ClassIndex = k,
+                        Name = label,
+                        Source = label
+                    }
+                },
                 Data = { VarName = node.GetInputVariable(nameof(input.TrainingData)).ToJson() }
             };
             var exp = new Experiment(env);
@@ -134,7 +134,9 @@ private static int GetNumberOfClasses(IHostEnvironment env, Arguments input, out
             }
         }
 
-        [TlcModule.EntryPoint(Desc = "One-vs-All macro (OVA)", Name = "Models.OneVersusAll")]
+        [TlcModule.EntryPoint(Desc = "One-vs-All macro (OVA)",
+            Name = "Models.OneVersusAll",
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.StandardLearners/Standard/MultiClass/doc.xml' path='doc/members/member[@name=""OVA""]'/>" })]
         public static CommonOutputs.MacroOutput<Output> OVA(
             IHostEnvironment env,
             Arguments input,