Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions docs/code/MlNetCookBook.md
Original file line number Diff line number Diff line change
Expand Up @@ -629,9 +629,9 @@ var trainData = mlContext.Data.ReadFromTextFile<IrisInputAllFeatures>(dataPath,
// Apply all kinds of standard ML.NET normalization to the raw features.
var pipeline =
mlContext.Transforms.Normalize(
new NormalizingEstimator.MinMaxColumn("MinMaxNormalized", "Features", fixZero: true),
new NormalizingEstimator.MeanVarColumn("MeanVarNormalized", "Features", fixZero: true),
new NormalizingEstimator.BinningColumn("BinNormalized", "Features", numBins: 256));
new NormalizingEstimator.MinMaxColumnOptions("MinMaxNormalized", "Features", fixZero: true),
new NormalizingEstimator.MeanVarColumnOptions("MeanVarNormalized", "Features", fixZero: true),
new NormalizingEstimator.BinningColumnOptions("BinNormalized", "Features", numBins: 256));

// Let's train our pipeline of normalizers, and then apply it to the same data.
var normalizedData = pipeline.Fit(trainData).Transform(trainData);
Expand Down
2 changes: 1 addition & 1 deletion docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ public static void Example()

// Composing a different pipeline if we wanted to normalize more than one column at a time.
// Using log scale as the normalization mode.
var multiColPipeline = ml.Transforms.Normalize(NormalizingEstimator.NormalizerMode.LogMeanVariance, new SimpleColumnInfo[] { ("LogInduced", "Induced"), ("LogSpontaneous", "Spontaneous") });
var multiColPipeline = ml.Transforms.Normalize(NormalizingEstimator.NormalizerMode.LogMeanVariance, new ColumnOptions[] { ("LogInduced", "Induced"), ("LogSpontaneous", "Spontaneous") });
// The transformed data.
var multiColtransformer = multiColPipeline.Fit(trainData);
var multiColtransformedData = multiColtransformer.Transform(trainData);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ public static void Example()
};

var engine = mlContext.Transforms.Text.TokenizeWords("TokenizedWords", "Sentiment_Text")
.Append(mlContext.Transforms.Conversion.ValueMap(lookupMap, "Words", "Ids", new SimpleColumnInfo[] { ("VariableLenghtFeatures", "TokenizedWords") }))
.Append(mlContext.Transforms.Conversion.ValueMap(lookupMap, "Words", "Ids", new ColumnOptions[] { ("VariableLenghtFeatures", "TokenizedWords") }))
.Append(mlContext.Transforms.CustomMapping(ResizeFeaturesAction, "Resize"))
.Append(mlContext.Transforms.ScoreTensorFlowModel(modelInfo, new[] { "Prediction/Softmax" }, new[] { "Features" }))
.Append(mlContext.Transforms.CopyColumns(("Prediction", "Prediction/Softmax")))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

namespace Microsoft.ML.Samples.Dynamic
{
public sealed class VectorWhitenWithColumnInfo
public sealed class VectorWhitenWithColumnOptions
{
/// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.HalLearners/">Microsoft.ML.HalLearners</a>.
public static void Example()
Expand Down Expand Up @@ -39,7 +39,7 @@ public static void Example()


// A pipeline to project Features column into white noise vector.
var whiteningPipeline = ml.Transforms.Projection.VectorWhiten(new Transforms.Projections.VectorWhiteningEstimator.ColumnInfo(
var whiteningPipeline = ml.Transforms.Projection.VectorWhiten(new Transforms.Projections.VectorWhiteningEstimator.ColumnOptions(
nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), kind: Transforms.Projections.WhiteningKind.Pca, pcaNum: 4));
// The transformed (projected) data.
var transformedData = whiteningPipeline.Fit(trainData).Transform(trainData);
Expand Down
8 changes: 4 additions & 4 deletions src/Microsoft.ML.Data/TrainCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -226,12 +226,12 @@ private void EnsureGroupPreservationColumn(ref IDataView data, ref string sampli
// Generate a new column with the hashed samplingKeyColumn.
while (data.Schema.TryGetColumnIndex(samplingKeyColumn, out tmp))
samplingKeyColumn = string.Format("{0}_{1:000}", origStratCol, ++inc);
HashingEstimator.ColumnInfo columnInfo;
HashingEstimator.ColumnOptions columnOptions;
if (seed.HasValue)
columnInfo = new HashingEstimator.ColumnInfo(samplingKeyColumn, origStratCol, 30, seed.Value);
columnOptions = new HashingEstimator.ColumnOptions(samplingKeyColumn, origStratCol, 30, seed.Value);
else
columnInfo = new HashingEstimator.ColumnInfo(samplingKeyColumn, origStratCol, 30);
data = new HashingEstimator(Environment, columnInfo).Fit(data).Transform(data);
columnOptions = new HashingEstimator.ColumnOptions(samplingKeyColumn, origStratCol, 30);
data = new HashingEstimator(Environment, columnOptions).Fit(data).Transform(data);
}
}
}
Expand Down
52 changes: 26 additions & 26 deletions src/Microsoft.ML.Data/Transforms/ColumnConcatenatingTransformer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ internal sealed class TaggedOptions
}

[BestFriend]
internal sealed class ColumnInfo
internal sealed class ColumnOptions
{
public readonly string Name;
private readonly (string name, string alias)[] _sources;
Expand All @@ -143,7 +143,7 @@ internal sealed class ColumnInfo
/// <summary>
/// This denotes a concatenation of all <paramref name="inputColumnNames"/> into column called <paramref name="name"/>.
/// </summary>
public ColumnInfo(string name, params string[] inputColumnNames)
public ColumnOptions(string name, params string[] inputColumnNames)
: this(name, GetPairs(inputColumnNames))
{
}
Expand All @@ -159,7 +159,7 @@ public ColumnInfo(string name, params string[] inputColumnNames)
/// For each input column, an 'alias' can be specified, to be used in constructing the resulting slot names.
/// If the alias is not specified, it defaults to be column name.
/// </summary>
public ColumnInfo(string name, IEnumerable<(string name, string alias)> inputColumnNames)
public ColumnOptions(string name, IEnumerable<(string name, string alias)> inputColumnNames)
{
Contracts.CheckNonEmpty(name, nameof(name));
Contracts.CheckValue(inputColumnNames, nameof(inputColumnNames));
Expand Down Expand Up @@ -195,7 +195,7 @@ public void Save(ModelSaveContext ctx)
}
}

internal ColumnInfo(ModelLoadContext ctx)
internal ColumnOptions(ModelLoadContext ctx)
{
Contracts.AssertValue(ctx);
// *** Binary format ***
Expand All @@ -218,7 +218,7 @@ internal ColumnInfo(ModelLoadContext ctx)
}
}

private readonly ColumnInfo[] _columns;
private readonly ColumnOptions[] _columns;

/// <summary>
/// The names of the output and input column pairs for the transformation.
Expand All @@ -232,14 +232,14 @@ internal ColumnInfo(ModelLoadContext ctx)
/// The column types must match, and the output column type is always a vector.
/// </summary>
internal ColumnConcatenatingTransformer(IHostEnvironment env, string outputColumnName, params string[] inputColumnNames)
: this(env, new ColumnInfo(outputColumnName, inputColumnNames))
: this(env, new ColumnOptions(outputColumnName, inputColumnNames))
{
}

/// <summary>
/// Concatenates multiple groups of columns, each group is denoted by one of <paramref name="columns"/>.
/// </summary>
internal ColumnConcatenatingTransformer(IHostEnvironment env, params ColumnInfo[] columns) :
internal ColumnConcatenatingTransformer(IHostEnvironment env, params ColumnOptions[] columns) :
base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ColumnConcatenatingTransformer)))
{
Contracts.CheckValue(columns, nameof(columns));
Expand Down Expand Up @@ -272,7 +272,7 @@ private protected override void SaveModel(ModelSaveContext ctx)
// *** Binary format ***
// int: number of columns
// for each column:
// columnInfo
// columnOptions

Contracts.Assert(_columns.Length > 0);
ctx.Writer.Write(_columns.Length);
Expand All @@ -293,18 +293,18 @@ private ColumnConcatenatingTransformer(IHostEnvironment env, ModelLoadContext ct
// *** Binary format ***
// int: number of columns
// for each column:
// columnInfo
// columnOptions
int n = ctx.Reader.ReadInt32();
Contracts.CheckDecode(n > 0);
_columns = new ColumnInfo[n];
_columns = new ColumnOptions[n];
for (int i = 0; i < n; i++)
_columns[i] = new ColumnInfo(ctx);
_columns[i] = new ColumnOptions(ctx);
}
else
_columns = LoadLegacy(ctx);
}

private ColumnInfo[] LoadLegacy(ModelLoadContext ctx)
private ColumnOptions[] LoadLegacy(ModelLoadContext ctx)
{
// *** Legacy binary format ***
// int: sizeof(Float).
Expand Down Expand Up @@ -359,9 +359,9 @@ private ColumnInfo[] LoadLegacy(ModelLoadContext ctx)
}
}

var result = new ColumnInfo[n];
var result = new ColumnOptions[n];
for (int i = 0; i < n; i++)
result[i] = new ColumnInfo(names[i],
result[i] = new ColumnOptions(names[i],
inputs[i].Zip(aliases[i], (name, alias) => (name, alias)));
return result;
}
Expand All @@ -380,7 +380,7 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa
env.CheckUserArg(Utils.Size(options.Columns[i].Source) > 0, nameof(options.Columns));

var cols = options.Columns
.Select(c => new ColumnInfo(c.Name, c.Source))
.Select(c => new ColumnOptions(c.Name, c.Source))
.ToArray();
var transformer = new ColumnConcatenatingTransformer(env, cols);
return transformer.MakeDataTransform(input);
Expand All @@ -400,7 +400,7 @@ internal static IDataTransform Create(IHostEnvironment env, TaggedOptions option
env.CheckUserArg(Utils.Size(options.Columns[i].Source) > 0, nameof(options.Columns));

var cols = options.Columns
.Select(c => new ColumnInfo(c.Name, c.Source.Select(kvp => (kvp.Value, kvp.Key != "" ? kvp.Key : null))))
.Select(c => new ColumnOptions(c.Name, c.Source.Select(kvp => (kvp.Value, kvp.Key != "" ? kvp.Key : null))))
.ToArray();
var transformer = new ColumnConcatenatingTransformer(env, cols);
return transformer.MakeDataTransform(input);
Expand Down Expand Up @@ -526,7 +526,7 @@ private sealed class BoundColumn
{
public readonly int[] SrcIndices;

private readonly ColumnInfo _columnInfo;
private readonly ColumnOptions _columnOptions;
private readonly DataViewType[] _srcTypes;

public readonly VectorType OutputType;
Expand All @@ -542,10 +542,10 @@ private sealed class BoundColumn

private readonly DataViewSchema _inputSchema;

public BoundColumn(DataViewSchema inputSchema, ColumnInfo columnInfo, int[] sources, VectorType outputType,
public BoundColumn(DataViewSchema inputSchema, ColumnOptions columnOptions, int[] sources, VectorType outputType,
bool isNormalized, bool hasSlotNames, bool hasCategoricals, int slotCount, int catCount)
{
_columnInfo = columnInfo;
_columnOptions = columnOptions;
SrcIndices = sources;
_srcTypes = sources.Select(c => inputSchema[c].Type).ToArray();

Expand All @@ -570,7 +570,7 @@ public DataViewSchema.DetachedColumn MakeSchemaColumn()
if (_isIdentity)
{
var inputCol = _inputSchema[SrcIndices[0]];
return new DataViewSchema.DetachedColumn(_columnInfo.Name, inputCol.Type, inputCol.Annotations);
return new DataViewSchema.DetachedColumn(_columnOptions.Name, inputCol.Type, inputCol.Annotations);
}

var metadata = new DataViewSchema.Annotations.Builder();
Expand All @@ -581,7 +581,7 @@ public DataViewSchema.DetachedColumn MakeSchemaColumn()
if (_hasCategoricals)
metadata.Add(AnnotationUtils.Kinds.CategoricalSlotRanges, _categoricalRangeType, (ValueGetter<VBuffer<int>>)GetCategoricalSlotRanges);

return new DataViewSchema.DetachedColumn(_columnInfo.Name, OutputType, metadata.ToAnnotations());
return new DataViewSchema.DetachedColumn(_columnOptions.Name, OutputType, metadata.ToAnnotations());
}

private void GetIsNormalized(ref bool value) => value = _isNormalized;
Expand Down Expand Up @@ -630,9 +630,9 @@ private void GetSlotNames(ref VBuffer<ReadOnlyMemory<char>> dst)
{
int colSrc = SrcIndices[i];
var typeSrc = _srcTypes[i];
Contracts.Assert(_columnInfo.Sources[i].alias != "");
Contracts.Assert(_columnOptions.Sources[i].alias != "");
var colName = _inputSchema[colSrc].Name;
var nameSrc = _columnInfo.Sources[i].alias ?? colName;
var nameSrc = _columnOptions.Sources[i].alias ?? colName;
if (!(typeSrc is VectorType vectorTypeSrc))
{
bldr.AddFeature(slot++, nameSrc.AsMemory());
Expand All @@ -650,7 +650,7 @@ private void GetSlotNames(ref VBuffer<ReadOnlyMemory<char>> dst)
{
inputMetadata.GetValue(AnnotationUtils.Kinds.SlotNames, ref names);
sb.Clear();
if (_columnInfo.Sources[i].alias != colName)
if (_columnOptions.Sources[i].alias != colName)
sb.Append(nameSrc).Append(".");
int len = sb.Length;
foreach (var kvp in names.Items())
Expand Down Expand Up @@ -801,15 +801,15 @@ private Delegate MakeGetter<T>(DataViewRow input)
public KeyValuePair<string, JToken> SavePfaInfo(BoundPfaContext ctx)
{
Contracts.AssertValue(ctx);
string outName = _columnInfo.Name;
string outName = _columnOptions.Name;
if (!OutputType.IsKnownSize) // Do not attempt variable length.
return new KeyValuePair<string, JToken>(outName, null);

string[] srcTokens = new string[SrcIndices.Length];
bool[] srcPrimitive = new bool[SrcIndices.Length];
for (int i = 0; i < SrcIndices.Length; ++i)
{
var srcName = _columnInfo.Sources[i].name;
var srcName = _columnOptions.Sources[i].name;
if ((srcTokens[i] = ctx.TokenOrNullForName(srcName)) == null)
return new KeyValuePair<string, JToken>(outName, null);
srcPrimitive[i] = _srcTypes[i] is PrimitiveDataViewType;
Expand Down
Loading