From 39f91250b3497340ea7241ce15e9d16d1e3b175d Mon Sep 17 00:00:00 2001 From: Artidoro Pagnoni Date: Fri, 22 Feb 2019 18:05:56 -0800 Subject: [PATCH 1/3] ColumnInfo renamed to ColumnOptions --- .../Dynamic/Normalizer.cs | 2 +- .../Dynamic/TensorFlow/TextClassification.cs | 2 +- .../Projection/VectorWhitenWithColumnInfo.cs | 4 +- src/Microsoft.ML.Data/DataDebuggerPreview.cs | 8 +- src/Microsoft.ML.Data/TrainCatalog.cs | 8 +- .../ColumnConcatenatingTransformer.cs | 52 ++++++------ .../ConversionsExtensionsCatalog.cs | 28 +++---- .../Transforms/ExtensionsCatalog.cs | 14 ++-- src/Microsoft.ML.Data/Transforms/Hashing.cs | 46 +++++----- .../Transforms/KeyToVector.cs | 24 +++--- .../Transforms/NormalizeColumn.cs | 32 +++---- .../Transforms/NormalizeColumnDbl.cs | 20 ++--- .../Transforms/NormalizeColumnSng.cs | 20 ++--- .../Transforms/Normalizer.cs | 84 +++++++++---------- .../Transforms/NormalizerCatalog.cs | 6 +- .../Transforms/SlotsDroppingTransformer.cs | 16 ++-- .../Transforms/TypeConverting.cs | 30 +++---- .../Transforms/ValueToKeyMappingEstimator.cs | 10 +-- .../ValueToKeyMappingTransformer.cs | 12 +-- .../FeatureCombiner.cs | 20 ++--- src/Microsoft.ML.FastTree/FastTree.cs | 2 +- .../VectorWhiteningStaticExtensions.cs | 4 +- .../HalLearnersCatalog.cs | 4 +- .../VectorWhitening.cs | 40 ++++----- .../ExtensionsCatalog.cs | 16 ++-- .../ImagePixelExtractor.cs | 36 ++++---- .../ImageResizer.cs | 28 +++---- .../VectorToImageTransform.cs | 38 ++++----- src/Microsoft.ML.PCA/PCACatalog.cs | 2 +- src/Microsoft.ML.PCA/PcaTransformer.cs | 18 ++-- .../CategoricalHashStaticExtensions.cs | 4 +- .../CategoricalStaticExtensions.cs | 4 +- .../ImageTransformsStatic.cs | 18 ++-- .../LdaStaticExtensions.cs | 4 +- .../NormalizerStaticExtensions.cs | 12 +-- .../TextStaticExtensions.cs | 8 +- .../TransformsStatic.cs | 44 +++++----- .../WordEmbeddingsStaticExtensions.cs | 4 +- .../CategoricalCatalog.cs | 6 +- .../ConversionsCatalog.cs | 4 +- .../CountFeatureSelection.cs | 34 ++++---- .../EntryPoints/TextAnalytics.cs | 2 +- .../ExtensionsCatalog.cs | 14 ++-- .../FeatureSelectionCatalog.cs | 6 +- src/Microsoft.ML.Transforms/GcnTransform.cs | 48 +++++------ .../HashJoiningTransform.cs | 18 ++-- .../LearnerFeatureSelection.cs | 6 +- .../MissingValueHandlingTransformer.cs | 14 ++-- .../MissingValueReplacing.cs | 32 +++---- .../MutualInformationFeatureSelection.cs | 8 +- src/Microsoft.ML.Transforms/OneHotEncoding.cs | 14 ++-- .../OneHotHashEncoding.cs | 18 ++-- .../ProjectionCatalog.cs | 6 +- .../RandomFourierFeaturizing.cs | 22 ++--- .../Text/LdaTransform.cs | 34 ++++---- .../Text/NgramHashingTransformer.cs | 30 +++---- .../Text/NgramTransform.cs | 24 +++--- .../Text/StopWordsRemovingTransformer.cs | 26 +++--- .../Text/TextCatalog.cs | 12 +-- .../Text/TextFeaturizingEstimator.cs | 16 ++-- .../Text/WordBagTransform.cs | 12 +-- .../Text/WordEmbeddingsExtractor.cs | 28 +++---- .../Text/WordHashBagProducingTransform.cs | 12 +-- .../Text/WordTokenizing.cs | 24 +++--- .../UngroupTransform.cs | 20 ++--- test/Microsoft.ML.Benchmarks/HashBench.cs | 2 +- .../UnitTests/TestEntryPoints.cs | 10 +-- .../DataPipe/TestDataPipe.cs | 4 +- test/Microsoft.ML.Tests/OnnxConversionTest.cs | 4 +- .../CookbookSamplesDynamicApi.cs | 6 +- .../TensorflowTests.cs | 4 +- test/Microsoft.ML.Tests/TermEstimatorTests.cs | 28 +++---- .../TrainerEstimators/TrainerEstimators.cs | 4 +- .../Transformers/CategoricalHashTests.cs | 34 ++++---- .../Transformers/CategoricalTests.cs | 40 ++++----- .../Transformers/ConcatTests.cs | 4 +- .../Transformers/ConvertTests.cs | 46 +++++----- .../Transformers/FeatureSelectionTests.cs | 24 +++--- .../Transformers/HashTests.cs | 34 ++++---- .../KeyToBinaryVectorEstimatorTest.cs | 24 +++--- .../Transformers/KeyToValueTests.cs | 8 +- .../Transformers/KeyToVectorEstimatorTests.cs | 60 ++++++------- .../Transformers/NAIndicatorTests.cs | 6 +- .../Transformers/NAReplaceTests.cs | 24 +++--- .../Transformers/NormalizerTests.cs | 76 ++++++++--------- .../Transformers/RffTests.cs | 8 +- .../Transformers/TextFeaturizerTests.cs | 2 +- .../Transformers/ValueMappingTests.cs | 10 +-- .../Transformers/WordTokenizeTests.cs | 8 +- 89 files changed, 842 insertions(+), 842 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs index 05e04976bd..b5397fa70a 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs @@ -66,7 +66,7 @@ public static void Example() // Composing a different pipeline if we wanted to normalize more than one column at a time. // Using log scale as the normalization mode. - var multiColPipeline = ml.Transforms.Normalize(NormalizingEstimator.NormalizerMode.LogMeanVariance, new SimpleColumnInfo[] { ("LogInduced", "Induced"), ("LogSpontaneous", "Spontaneous") }); + var multiColPipeline = ml.Transforms.Normalize(NormalizingEstimator.NormalizerMode.LogMeanVariance, new ColumnOptions[] { ("LogInduced", "Induced"), ("LogSpontaneous", "Spontaneous") }); // The transformed data. var multiColtransformer = multiColPipeline.Fit(trainData); var multiColtransformedData = multiColtransformer.Transform(trainData); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs index 54185fdfb2..b42b7a157c 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs @@ -70,7 +70,7 @@ public static void Example() }; var engine = mlContext.Transforms.Text.TokenizeWords("TokenizedWords", "Sentiment_Text") - .Append(mlContext.Transforms.Conversion.ValueMap(lookupMap, "Words", "Ids", new SimpleColumnInfo[] { ("VariableLenghtFeatures", "TokenizedWords") })) + .Append(mlContext.Transforms.Conversion.ValueMap(lookupMap, "Words", "Ids", new ColumnOptions[] { ("VariableLenghtFeatures", "TokenizedWords") })) .Append(mlContext.Transforms.CustomMapping(ResizeFeaturesAction, "Resize")) .Append(mlContext.Transforms.ScoreTensorFlowModel(modelInfo, new[] { "Prediction/Softmax" }, new[] { "Features" })) .Append(mlContext.Transforms.CopyColumns(("Prediction", "Prediction/Softmax"))) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhitenWithColumnInfo.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhitenWithColumnInfo.cs index a1b4c3b829..ce8eb5385c 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhitenWithColumnInfo.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhitenWithColumnInfo.cs @@ -5,7 +5,7 @@ namespace Microsoft.ML.Samples.Dynamic { - public sealed class VectorWhitenWithColumnInfo + public sealed class VectorWhitenWithColumnOptions { /// This example requires installation of additional nuget package Microsoft.ML.HalLearners. public static void Example() @@ -39,7 +39,7 @@ public static void Example() // A pipeline to project Features column into white noise vector. - var whiteningPipeline = ml.Transforms.Projection.VectorWhiten(new Transforms.Projections.VectorWhiteningEstimator.ColumnInfo( + var whiteningPipeline = ml.Transforms.Projection.VectorWhiten(new Transforms.Projections.VectorWhiteningEstimator.ColumnOptions( nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), kind: Transforms.Projections.WhiteningKind.Pca, pcaNum: 4)); // The transformed (projected) data. var transformedData = whiteningPipeline.Fit(trainData).Transform(trainData); diff --git a/src/Microsoft.ML.Data/DataDebuggerPreview.cs b/src/Microsoft.ML.Data/DataDebuggerPreview.cs index 2a8210bc7c..988f4af06a 100644 --- a/src/Microsoft.ML.Data/DataDebuggerPreview.cs +++ b/src/Microsoft.ML.Data/DataDebuggerPreview.cs @@ -22,7 +22,7 @@ internal static class Defaults } public DataViewSchema Schema { get; } - public ImmutableArray ColumnView { get; } + public ImmutableArray ColumnView { get; } public ImmutableArray RowView { get; } internal DataDebuggerPreview(IDataView data, int maxRows = Defaults.MaxRows) @@ -56,7 +56,7 @@ internal DataDebuggerPreview(IDataView data, int maxRows = Defaults.MaxRows) } } RowView = rows.ToImmutableArray(); - ColumnView = Enumerable.Range(0, n).Select(c => new ColumnInfo(data.Schema[c], columns[c].ToArray())).ToImmutableArray(); + ColumnView = Enumerable.Range(0, n).Select(c => new ColumnOptions(data.Schema[c], columns[c].ToArray())).ToImmutableArray(); } public override string ToString() @@ -94,14 +94,14 @@ internal RowInfo(int n) } } - public sealed class ColumnInfo + public sealed class ColumnOptions { public DataViewSchema.Column Column { get; } public object[] Values { get; } public override string ToString() => $"{Column.Name}: {Column.Type}"; - internal ColumnInfo(DataViewSchema.Column column, object[] values) + internal ColumnOptions(DataViewSchema.Column column, object[] values) { Column = column; Values = values; diff --git a/src/Microsoft.ML.Data/TrainCatalog.cs b/src/Microsoft.ML.Data/TrainCatalog.cs index 243c71eff0..c3b7f7bd99 100644 --- a/src/Microsoft.ML.Data/TrainCatalog.cs +++ b/src/Microsoft.ML.Data/TrainCatalog.cs @@ -226,12 +226,12 @@ private void EnsureGroupPreservationColumn(ref IDataView data, ref string sampli // Generate a new column with the hashed samplingKeyColumn. while (data.Schema.TryGetColumnIndex(samplingKeyColumn, out tmp)) samplingKeyColumn = string.Format("{0}_{1:000}", origStratCol, ++inc); - HashingEstimator.ColumnInfo columnInfo; + HashingEstimator.ColumnOptions columnOptions; if (seed.HasValue) - columnInfo = new HashingEstimator.ColumnInfo(samplingKeyColumn, origStratCol, 30, seed.Value); + columnOptions = new HashingEstimator.ColumnOptions(samplingKeyColumn, origStratCol, 30, seed.Value); else - columnInfo = new HashingEstimator.ColumnInfo(samplingKeyColumn, origStratCol, 30); - data = new HashingEstimator(Environment, columnInfo).Fit(data).Transform(data); + columnOptions = new HashingEstimator.ColumnOptions(samplingKeyColumn, origStratCol, 30); + data = new HashingEstimator(Environment, columnOptions).Fit(data).Transform(data); } } } diff --git a/src/Microsoft.ML.Data/Transforms/ColumnConcatenatingTransformer.cs b/src/Microsoft.ML.Data/Transforms/ColumnConcatenatingTransformer.cs index de016c9ba9..15dd99b6fe 100644 --- a/src/Microsoft.ML.Data/Transforms/ColumnConcatenatingTransformer.cs +++ b/src/Microsoft.ML.Data/Transforms/ColumnConcatenatingTransformer.cs @@ -134,7 +134,7 @@ internal sealed class TaggedOptions } [BestFriend] - internal sealed class ColumnInfo + internal sealed class ColumnOptions { public readonly string Name; private readonly (string name, string alias)[] _sources; @@ -143,7 +143,7 @@ internal sealed class ColumnInfo /// /// This denotes a concatenation of all into column called . /// - public ColumnInfo(string name, params string[] inputColumnNames) + public ColumnOptions(string name, params string[] inputColumnNames) : this(name, GetPairs(inputColumnNames)) { } @@ -159,7 +159,7 @@ public ColumnInfo(string name, params string[] inputColumnNames) /// For each input column, an 'alias' can be specified, to be used in constructing the resulting slot names. /// If the alias is not specified, it defaults to be column name. /// - public ColumnInfo(string name, IEnumerable<(string name, string alias)> inputColumnNames) + public ColumnOptions(string name, IEnumerable<(string name, string alias)> inputColumnNames) { Contracts.CheckNonEmpty(name, nameof(name)); Contracts.CheckValue(inputColumnNames, nameof(inputColumnNames)); @@ -195,7 +195,7 @@ public void Save(ModelSaveContext ctx) } } - internal ColumnInfo(ModelLoadContext ctx) + internal ColumnOptions(ModelLoadContext ctx) { Contracts.AssertValue(ctx); // *** Binary format *** @@ -218,7 +218,7 @@ internal ColumnInfo(ModelLoadContext ctx) } } - private readonly ColumnInfo[] _columns; + private readonly ColumnOptions[] _columns; /// /// The names of the output and input column pairs for the transformation. @@ -232,14 +232,14 @@ internal ColumnInfo(ModelLoadContext ctx) /// The column types must match, and the output column type is always a vector. /// internal ColumnConcatenatingTransformer(IHostEnvironment env, string outputColumnName, params string[] inputColumnNames) - : this(env, new ColumnInfo(outputColumnName, inputColumnNames)) + : this(env, new ColumnOptions(outputColumnName, inputColumnNames)) { } /// /// Concatenates multiple groups of columns, each group is denoted by one of . /// - internal ColumnConcatenatingTransformer(IHostEnvironment env, params ColumnInfo[] columns) : + internal ColumnConcatenatingTransformer(IHostEnvironment env, params ColumnOptions[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ColumnConcatenatingTransformer))) { Contracts.CheckValue(columns, nameof(columns)); @@ -272,7 +272,7 @@ private protected override void SaveModel(ModelSaveContext ctx) // *** Binary format *** // int: number of columns // for each column: - // columnInfo + // columnOptions Contracts.Assert(_columns.Length > 0); ctx.Writer.Write(_columns.Length); @@ -293,18 +293,18 @@ private ColumnConcatenatingTransformer(IHostEnvironment env, ModelLoadContext ct // *** Binary format *** // int: number of columns // for each column: - // columnInfo + // columnOptions int n = ctx.Reader.ReadInt32(); Contracts.CheckDecode(n > 0); - _columns = new ColumnInfo[n]; + _columns = new ColumnOptions[n]; for (int i = 0; i < n; i++) - _columns[i] = new ColumnInfo(ctx); + _columns[i] = new ColumnOptions(ctx); } else _columns = LoadLegacy(ctx); } - private ColumnInfo[] LoadLegacy(ModelLoadContext ctx) + private ColumnOptions[] LoadLegacy(ModelLoadContext ctx) { // *** Legacy binary format *** // int: sizeof(Float). @@ -359,9 +359,9 @@ private ColumnInfo[] LoadLegacy(ModelLoadContext ctx) } } - var result = new ColumnInfo[n]; + var result = new ColumnOptions[n]; for (int i = 0; i < n; i++) - result[i] = new ColumnInfo(names[i], + result[i] = new ColumnOptions(names[i], inputs[i].Zip(aliases[i], (name, alias) => (name, alias))); return result; } @@ -380,7 +380,7 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa env.CheckUserArg(Utils.Size(options.Columns[i].Source) > 0, nameof(options.Columns)); var cols = options.Columns - .Select(c => new ColumnInfo(c.Name, c.Source)) + .Select(c => new ColumnOptions(c.Name, c.Source)) .ToArray(); var transformer = new ColumnConcatenatingTransformer(env, cols); return transformer.MakeDataTransform(input); @@ -400,7 +400,7 @@ internal static IDataTransform Create(IHostEnvironment env, TaggedOptions option env.CheckUserArg(Utils.Size(options.Columns[i].Source) > 0, nameof(options.Columns)); var cols = options.Columns - .Select(c => new ColumnInfo(c.Name, c.Source.Select(kvp => (kvp.Value, kvp.Key != "" ? kvp.Key : null)))) + .Select(c => new ColumnOptions(c.Name, c.Source.Select(kvp => (kvp.Value, kvp.Key != "" ? kvp.Key : null)))) .ToArray(); var transformer = new ColumnConcatenatingTransformer(env, cols); return transformer.MakeDataTransform(input); @@ -526,7 +526,7 @@ private sealed class BoundColumn { public readonly int[] SrcIndices; - private readonly ColumnInfo _columnInfo; + private readonly ColumnOptions _columnOptions; private readonly DataViewType[] _srcTypes; public readonly VectorType OutputType; @@ -542,10 +542,10 @@ private sealed class BoundColumn private readonly DataViewSchema _inputSchema; - public BoundColumn(DataViewSchema inputSchema, ColumnInfo columnInfo, int[] sources, VectorType outputType, + public BoundColumn(DataViewSchema inputSchema, ColumnOptions columnOptions, int[] sources, VectorType outputType, bool isNormalized, bool hasSlotNames, bool hasCategoricals, int slotCount, int catCount) { - _columnInfo = columnInfo; + _columnOptions = columnOptions; SrcIndices = sources; _srcTypes = sources.Select(c => inputSchema[c].Type).ToArray(); @@ -570,7 +570,7 @@ public DataViewSchema.DetachedColumn MakeSchemaColumn() if (_isIdentity) { var inputCol = _inputSchema[SrcIndices[0]]; - return new DataViewSchema.DetachedColumn(_columnInfo.Name, inputCol.Type, inputCol.Annotations); + return new DataViewSchema.DetachedColumn(_columnOptions.Name, inputCol.Type, inputCol.Annotations); } var metadata = new DataViewSchema.Annotations.Builder(); @@ -581,7 +581,7 @@ public DataViewSchema.DetachedColumn MakeSchemaColumn() if (_hasCategoricals) metadata.Add(AnnotationUtils.Kinds.CategoricalSlotRanges, _categoricalRangeType, (ValueGetter>)GetCategoricalSlotRanges); - return new DataViewSchema.DetachedColumn(_columnInfo.Name, OutputType, metadata.ToAnnotations()); + return new DataViewSchema.DetachedColumn(_columnOptions.Name, OutputType, metadata.ToAnnotations()); } private void GetIsNormalized(ref bool value) => value = _isNormalized; @@ -630,9 +630,9 @@ private void GetSlotNames(ref VBuffer> dst) { int colSrc = SrcIndices[i]; var typeSrc = _srcTypes[i]; - Contracts.Assert(_columnInfo.Sources[i].alias != ""); + Contracts.Assert(_columnOptions.Sources[i].alias != ""); var colName = _inputSchema[colSrc].Name; - var nameSrc = _columnInfo.Sources[i].alias ?? colName; + var nameSrc = _columnOptions.Sources[i].alias ?? colName; if (!(typeSrc is VectorType vectorTypeSrc)) { bldr.AddFeature(slot++, nameSrc.AsMemory()); @@ -650,7 +650,7 @@ private void GetSlotNames(ref VBuffer> dst) { inputMetadata.GetValue(AnnotationUtils.Kinds.SlotNames, ref names); sb.Clear(); - if (_columnInfo.Sources[i].alias != colName) + if (_columnOptions.Sources[i].alias != colName) sb.Append(nameSrc).Append("."); int len = sb.Length; foreach (var kvp in names.Items()) @@ -801,7 +801,7 @@ private Delegate MakeGetter(DataViewRow input) public KeyValuePair SavePfaInfo(BoundPfaContext ctx) { Contracts.AssertValue(ctx); - string outName = _columnInfo.Name; + string outName = _columnOptions.Name; if (!OutputType.IsKnownSize) // Do not attempt variable length. return new KeyValuePair(outName, null); @@ -809,7 +809,7 @@ public KeyValuePair SavePfaInfo(BoundPfaContext ctx) bool[] srcPrimitive = new bool[SrcIndices.Length]; for (int i = 0; i < SrcIndices.Length; ++i) { - var srcName = _columnInfo.Sources[i].name; + var srcName = _columnOptions.Sources[i].name; if ((srcTokens[i] = ctx.TokenOrNullForName(srcName)) == null) return new KeyValuePair(outName, null); srcPrimitive[i] = _srcTypes[i] is PrimitiveDataViewType; diff --git a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs index 3b6430f2a6..21b6ff4fd2 100644 --- a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs +++ b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs @@ -37,7 +37,7 @@ public static HashingEstimator Hash(this TransformsCatalog.ConversionTransforms /// /// The transform's catalog. /// Description of dataset columns and how to process them. - public static HashingEstimator Hash(this TransformsCatalog.ConversionTransforms catalog, params HashingEstimator.ColumnInfo[] columns) + public static HashingEstimator Hash(this TransformsCatalog.ConversionTransforms catalog, params HashingEstimator.ColumnOptions[] columns) => new HashingEstimator(CatalogUtils.GetEnvironment(catalog), columns); /// @@ -56,7 +56,7 @@ public static TypeConvertingEstimator ConvertType(this TransformsCatalog.Convers /// /// The transform's catalog. /// Description of dataset columns and how to process them. - public static TypeConvertingEstimator ConvertType(this TransformsCatalog.ConversionTransforms catalog, params TypeConvertingEstimator.ColumnInfo[] columns) + public static TypeConvertingEstimator ConvertType(this TransformsCatalog.ConversionTransforms catalog, params TypeConvertingEstimator.ColumnOptions[] columns) => new TypeConvertingEstimator(CatalogUtils.GetEnvironment(catalog), columns); /// @@ -86,8 +86,8 @@ public static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.Co /// [!code-csharp[KeyToValueMappingEstimator](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs)] /// ]]> /// - public static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.ConversionTransforms catalog, params SimpleColumnInfo[] columns) - => new KeyToValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), SimpleColumnInfo.ConvertToValueTuples(columns)); + public static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.ConversionTransforms catalog, params ColumnOptions[] columns) + => new KeyToValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), ColumnOptions.ConvertToValueTuples(columns)); /// /// Convert the key types back to their original vectors. @@ -95,7 +95,7 @@ public static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.Co /// The categorical transform's catalog. /// The input column to map back to vectors. public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog.ConversionTransforms catalog, - params KeyToVectorMappingEstimator.ColumnInfo[] columns) + params KeyToVectorMappingEstimator.ColumnOptions[] columns) => new KeyToVectorMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns); /// @@ -148,7 +148,7 @@ public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.Co /// /// public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.ConversionTransforms catalog, - ValueToKeyMappingEstimator.ColumnInfo[] columns, IDataView keyData = null) + ValueToKeyMappingEstimator.ColumnOptions[] columns, IDataView keyData = null) => new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns, keyData); /// @@ -175,8 +175,8 @@ public static ValueMappingEstimator ValueMap keys, IEnumerable values, - params SimpleColumnInfo[] columns) - => new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, SimpleColumnInfo.ConvertToValueTuples(columns)); + params ColumnOptions[] columns) + => new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, ColumnOptions.ConvertToValueTuples(columns)); /// /// @@ -201,9 +201,9 @@ public static ValueMappingEstimator ValueMap keys, IEnumerable values, bool treatValuesAsKeyType, - params SimpleColumnInfo[] columns) + params ColumnOptions[] columns) => new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, treatValuesAsKeyType, - SimpleColumnInfo.ConvertToValueTuples(columns)); + ColumnOptions.ConvertToValueTuples(columns)); /// /// @@ -229,9 +229,9 @@ public static ValueMappingEstimator ValueMap keys, IEnumerable values, - params SimpleColumnInfo[] columns) + params ColumnOptions[] columns) => new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, - SimpleColumnInfo.ConvertToValueTuples(columns)); + ColumnOptions.ConvertToValueTuples(columns)); /// /// @@ -253,8 +253,8 @@ public static ValueMappingEstimator ValueMap public static ValueMappingEstimator ValueMap( this TransformsCatalog.ConversionTransforms catalog, - IDataView lookupMap, string keyColumn, string valueColumn, params SimpleColumnInfo[] columns) + IDataView lookupMap, string keyColumn, string valueColumn, params ColumnOptions[] columns) => new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), lookupMap, keyColumn, valueColumn, - SimpleColumnInfo.ConvertToValueTuples(columns)); + ColumnOptions.ConvertToValueTuples(columns)); } } diff --git a/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs b/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs index e71b7c5988..976f787c20 100644 --- a/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs @@ -9,24 +9,24 @@ namespace Microsoft.ML { - public sealed class SimpleColumnInfo + public sealed class ColumnOptions { private readonly string _outputColumnName; private readonly string _inputColumnName; - public SimpleColumnInfo(string outputColumnName, string inputColumnName) + public ColumnOptions(string outputColumnName, string inputColumnName) { _outputColumnName = outputColumnName; _inputColumnName = inputColumnName; } - public static implicit operator SimpleColumnInfo((string outputColumnName, string inputColumnName) value) + public static implicit operator ColumnOptions((string outputColumnName, string inputColumnName) value) { - return new SimpleColumnInfo(value.outputColumnName, value.inputColumnName); + return new ColumnOptions(value.outputColumnName, value.inputColumnName); } [BestFriend] - internal static (string outputColumnName, string inputColumnName)[] ConvertToValueTuples(SimpleColumnInfo[] infos) + internal static (string outputColumnName, string inputColumnName)[] ConvertToValueTuples(ColumnOptions[] infos) { return infos.Select(info => (info._outputColumnName, info._inputColumnName)).ToArray(); } @@ -66,8 +66,8 @@ public static ColumnCopyingEstimator CopyColumns(this TransformsCatalog catalog, /// ]]> /// /// - public static ColumnCopyingEstimator CopyColumns(this TransformsCatalog catalog, params SimpleColumnInfo[] columns) - => new ColumnCopyingEstimator(CatalogUtils.GetEnvironment(catalog), SimpleColumnInfo.ConvertToValueTuples(columns)); + public static ColumnCopyingEstimator CopyColumns(this TransformsCatalog catalog, params ColumnOptions[] columns) + => new ColumnCopyingEstimator(CatalogUtils.GetEnvironment(catalog), ColumnOptions.ConvertToValueTuples(columns)); /// /// Concatenates columns together. diff --git a/src/Microsoft.ML.Data/Transforms/Hashing.cs b/src/Microsoft.ML.Data/Transforms/Hashing.cs index 8e6ad4884d..174c5f2e0a 100644 --- a/src/Microsoft.ML.Data/Transforms/Hashing.cs +++ b/src/Microsoft.ML.Data/Transforms/Hashing.cs @@ -132,7 +132,7 @@ private static VersionInfo GetVersionInfo() loaderAssemblyName: typeof(HashingTransformer).Assembly.FullName); } - private readonly HashingEstimator.ColumnInfo[] _columns; + private readonly HashingEstimator.ColumnOptions[] _columns; private readonly VBuffer>[] _keyValues; private readonly VectorType[] _kvTypes; @@ -143,13 +143,13 @@ private protected override void CheckInputColumn(DataViewSchema inputSchema, int throw Host.ExceptParam(nameof(inputSchema), HashingEstimator.ExpectedColumnType); } - private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(HashingEstimator.ColumnInfo[] columns) + private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(HashingEstimator.ColumnOptions[] columns) { Contracts.CheckNonEmpty(columns, nameof(columns)); return columns.Select(x => (x.Name, x.InputColumnName)).ToArray(); } - private DataViewType GetOutputType(DataViewSchema inputSchema, HashingEstimator.ColumnInfo column) + private DataViewType GetOutputType(DataViewSchema inputSchema, HashingEstimator.ColumnOptions column) { var keyCount = (ulong)1 << column.HashBits; inputSchema.TryGetColumnIndex(column.InputColumnName, out int srcCol); @@ -166,7 +166,7 @@ private DataViewType GetOutputType(DataViewSchema inputSchema, HashingEstimator. /// /// Host Environment. /// Description of dataset columns and how to process them. - internal HashingTransformer(IHostEnvironment env, params HashingEstimator.ColumnInfo[] columns) : + internal HashingTransformer(IHostEnvironment env, params HashingEstimator.ColumnOptions[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(RegistrationName), GetColumnPairs(columns)) { _columns = columns.ToArray(); @@ -177,7 +177,7 @@ internal HashingTransformer(IHostEnvironment env, params HashingEstimator.Column } } - internal HashingTransformer(IHostEnvironment env, IDataView input, params HashingEstimator.ColumnInfo[] columns) : + internal HashingTransformer(IHostEnvironment env, IDataView input, params HashingEstimator.ColumnOptions[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(RegistrationName), GetColumnPairs(columns)) { _columns = columns.ToArray(); @@ -271,9 +271,9 @@ private HashingTransformer(IHost host, ModelLoadContext ctx) : base(host, ctx) { var columnsLength = ColumnPairs.Length; - _columns = new HashingEstimator.ColumnInfo[columnsLength]; + _columns = new HashingEstimator.ColumnOptions[columnsLength]; for (int i = 0; i < columnsLength; i++) - _columns[i] = new HashingEstimator.ColumnInfo(ColumnPairs[i].outputColumnName, ColumnPairs[i].inputColumnName, ctx); + _columns[i] = new HashingEstimator.ColumnOptions(ColumnPairs[i].outputColumnName, ColumnPairs[i].inputColumnName, ctx); TextModelHelper.LoadAll(Host, ctx, columnsLength, out _keyValues, out _kvTypes); } @@ -312,12 +312,12 @@ private static IDataTransform Create(IHostEnvironment env, Options options, IDat env.CheckValue(input, nameof(input)); env.CheckValue(options.Columns, nameof(options.Columns)); - var cols = new HashingEstimator.ColumnInfo[options.Columns.Length]; + var cols = new HashingEstimator.ColumnOptions[options.Columns.Length]; for (int i = 0; i < cols.Length; i++) { var item = options.Columns[i]; var kind = item.InvertHash ?? options.InvertHash; - cols[i] = new HashingEstimator.ColumnInfo( + cols[i] = new HashingEstimator.ColumnOptions( item.Name, item.Source ?? item.Name, item.HashBits ?? options.HashBits, @@ -843,11 +843,11 @@ private abstract class InvertHashHelper { protected readonly DataViewRow Row; private readonly bool _includeSlot; - private readonly HashingEstimator.ColumnInfo _ex; + private readonly HashingEstimator.ColumnOptions _ex; private readonly DataViewType _srcType; private readonly int _srcCol; - private InvertHashHelper(DataViewRow row, HashingEstimator.ColumnInfo ex) + private InvertHashHelper(DataViewRow row, HashingEstimator.ColumnOptions ex) { Contracts.AssertValue(row); Row = row; @@ -868,7 +868,7 @@ private InvertHashHelper(DataViewRow row, HashingEstimator.ColumnInfo ex) /// The extra column info /// The number of input hashed valuPres to accumulate per output hash value /// A hash getter, built on top of . - public static InvertHashHelper Create(DataViewRow row, HashingEstimator.ColumnInfo ex, int invertHashMaxCount, Delegate dstGetter) + public static InvertHashHelper Create(DataViewRow row, HashingEstimator.ColumnOptions ex, int invertHashMaxCount, Delegate dstGetter) { row.Schema.TryGetColumnIndex(ex.InputColumnName, out int srcCol); DataViewType typeSrc = row.Schema[srcCol].Type; @@ -879,7 +879,7 @@ public static InvertHashHelper Create(DataViewRow row, HashingEstimator.ColumnIn t = t.MakeGenericType(itemType.RawType); - var consTypes = new Type[] { typeof(DataViewRow), typeof(HashingEstimator.ColumnInfo), typeof(int), typeof(Delegate) }; + var consTypes = new Type[] { typeof(DataViewRow), typeof(HashingEstimator.ColumnOptions), typeof(int), typeof(Delegate) }; var constructorInfo = t.GetConstructor(consTypes); return (InvertHashHelper)constructorInfo.Invoke(new object[] { row, ex, invertHashMaxCount, dstGetter }); } @@ -956,7 +956,7 @@ private abstract class Impl : InvertHashHelper { protected readonly InvertHashCollector Collector; - protected Impl(DataViewRow row, HashingEstimator.ColumnInfo ex, int invertHashMaxCount) + protected Impl(DataViewRow row, HashingEstimator.ColumnOptions ex, int invertHashMaxCount) : base(row, ex) { Contracts.AssertValue(row); @@ -989,7 +989,7 @@ private sealed class ImplOne : Impl private T _value; private uint _hash; - public ImplOne(DataViewRow row, HashingEstimator.ColumnInfo ex, int invertHashMaxCount, Delegate dstGetter) + public ImplOne(DataViewRow row, HashingEstimator.ColumnOptions ex, int invertHashMaxCount, Delegate dstGetter) : base(row, ex, invertHashMaxCount) { _srcGetter = Row.GetGetter(_srcCol); @@ -1023,7 +1023,7 @@ private sealed class ImplVec : Impl private VBuffer _value; private VBuffer _hash; - public ImplVec(DataViewRow row, HashingEstimator.ColumnInfo ex, int invertHashMaxCount, Delegate dstGetter) + public ImplVec(DataViewRow row, HashingEstimator.ColumnOptions ex, int invertHashMaxCount, Delegate dstGetter) : base(row, ex, invertHashMaxCount) { _srcGetter = Row.GetGetter>(_srcCol); @@ -1057,7 +1057,7 @@ private sealed class ImplVecOrdered : Impl> private VBuffer _value; private VBuffer _hash; - public ImplVecOrdered(DataViewRow row, HashingEstimator.ColumnInfo ex, int invertHashMaxCount, Delegate dstGetter) + public ImplVecOrdered(DataViewRow row, HashingEstimator.ColumnOptions ex, int invertHashMaxCount, Delegate dstGetter) : base(row, ex, invertHashMaxCount) { _srcGetter = Row.GetGetter>(_srcCol); @@ -1124,7 +1124,7 @@ internal static class Defaults /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnInfo + public sealed class ColumnOptions { /// /// Name of the column resulting from the transformation of . @@ -1158,7 +1158,7 @@ public sealed class ColumnInfo /// Text representation of original values are stored in the slot names of the metadata for the new column.Hashing, as such, can map many initial values to one. /// specifies the upper bound of the number of distinct input values mapping to a hash that should be retained. /// 0 does not retain any input values. -1 retains all input values mapping to each hash. - public ColumnInfo(string name, + public ColumnOptions(string name, string inputColumnName = null, int hashBits = Defaults.HashBits, uint seed = Defaults.Seed, @@ -1178,7 +1178,7 @@ public ColumnInfo(string name, InvertHash = invertHash; } - internal ColumnInfo(string name, string inputColumnName, ModelLoadContext ctx) + internal ColumnOptions(string name, string inputColumnName, ModelLoadContext ctx) { Name = name; InputColumnName = inputColumnName; @@ -1208,7 +1208,7 @@ internal void Save(ModelSaveContext ctx) } private readonly IHost _host; - private readonly ColumnInfo[] _columns; + private readonly ColumnOptions[] _columns; internal static bool IsColumnTypeValid(DataViewType type) { @@ -1233,7 +1233,7 @@ internal static bool IsColumnTypeValid(DataViewType type) /// 0 does not retain any input values. -1 retains all input values mapping to each hash. internal HashingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null, int hashBits = Defaults.HashBits, int invertHash = Defaults.InvertHash) - : this(env, new ColumnInfo(outputColumnName, inputColumnName ?? outputColumnName, hashBits: hashBits, invertHash: invertHash)) + : this(env, new ColumnOptions(outputColumnName, inputColumnName ?? outputColumnName, hashBits: hashBits, invertHash: invertHash)) { } @@ -1243,7 +1243,7 @@ internal HashingEstimator(IHostEnvironment env, string outputColumnName, string /// Host Environment. /// Description of dataset columns and how to process them. [BestFriend] - internal HashingEstimator(IHostEnvironment env, params ColumnInfo[] columns) + internal HashingEstimator(IHostEnvironment env, params ColumnOptions[] columns) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(nameof(HashingEstimator)); diff --git a/src/Microsoft.ML.Data/Transforms/KeyToVector.cs b/src/Microsoft.ML.Data/Transforms/KeyToVector.cs index abe6b81f2e..3c9afe60d2 100644 --- a/src/Microsoft.ML.Data/Transforms/KeyToVector.cs +++ b/src/Microsoft.ML.Data/Transforms/KeyToVector.cs @@ -96,10 +96,10 @@ internal sealed class Options private const string RegistrationName = "KeyToVector"; - public IReadOnlyCollection Columns => _columns.AsReadOnly(); - private readonly KeyToVectorMappingEstimator.ColumnInfo[] _columns; + public IReadOnlyCollection Columns => _columns.AsReadOnly(); + private readonly KeyToVectorMappingEstimator.ColumnOptions[] _columns; - private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(KeyToVectorMappingEstimator.ColumnInfo[] columns) + private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(KeyToVectorMappingEstimator.ColumnOptions[] columns) { Contracts.CheckValue(columns, nameof(columns)); return columns.Select(x => (x.Name, x.InputColumnName)).ToArray(); @@ -120,7 +120,7 @@ private protected override void CheckInputColumn(DataViewSchema inputSchema, int throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", ColumnPairs[col].inputColumnName, reason, type.ToString()); } - internal KeyToVectorMappingTransformer(IHostEnvironment env, params KeyToVectorMappingEstimator.ColumnInfo[] columns) : + internal KeyToVectorMappingTransformer(IHostEnvironment env, params KeyToVectorMappingEstimator.ColumnOptions[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(RegistrationName), GetColumnPairs(columns)) { _columns = columns.ToArray(); @@ -185,9 +185,9 @@ private KeyToVectorMappingTransformer(IHost host, ModelLoadContext ctx) var bags = new bool[columnsLength]; bags = ctx.Reader.ReadBoolArray(columnsLength); - _columns = new KeyToVectorMappingEstimator.ColumnInfo[columnsLength]; + _columns = new KeyToVectorMappingEstimator.ColumnOptions[columnsLength]; for (int i = 0; i < columnsLength; i++) - _columns[i] = new KeyToVectorMappingEstimator.ColumnInfo(ColumnPairs[i].outputColumnName, ColumnPairs[i].inputColumnName, bags[i]); + _columns[i] = new KeyToVectorMappingEstimator.ColumnOptions(ColumnPairs[i].outputColumnName, ColumnPairs[i].inputColumnName, bags[i]); } // Factory method for SignatureDataTransform. @@ -198,12 +198,12 @@ private static IDataTransform Create(IHostEnvironment env, Options options, IDat env.CheckValue(input, nameof(input)); env.CheckValue(options.Columns, nameof(options.Columns)); - var cols = new KeyToVectorMappingEstimator.ColumnInfo[options.Columns.Length]; + var cols = new KeyToVectorMappingEstimator.ColumnOptions[options.Columns.Length]; for (int i = 0; i < cols.Length; i++) { var item = options.Columns[i]; - cols[i] = new KeyToVectorMappingEstimator.ColumnInfo( + cols[i] = new KeyToVectorMappingEstimator.ColumnOptions( item.Name, item.Source ?? item.Name, item.Bag ?? options.Bag); @@ -735,7 +735,7 @@ internal static class Defaults /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnInfo + public sealed class ColumnOptions { /// Name of the column resulting from the transformation of . public readonly string Name; @@ -753,7 +753,7 @@ public sealed class ColumnInfo /// Name of the column resulting from the transformation of . /// Name of column to transform. If set to , the value of the will be used as source. /// Whether to combine multiple indicator vectors into a single bag vector instead of concatenating them. This is only relevant when the input column is a vector. - public ColumnInfo(string name, string inputColumnName = null, bool bag = Defaults.Bag) + public ColumnOptions(string name, string inputColumnName = null, bool bag = Defaults.Bag) { Contracts.CheckNonWhiteSpace(name, nameof(name)); Name = name; @@ -762,13 +762,13 @@ public ColumnInfo(string name, string inputColumnName = null, bool bag = Default } } - internal KeyToVectorMappingEstimator(IHostEnvironment env, params ColumnInfo[] columns) + internal KeyToVectorMappingEstimator(IHostEnvironment env, params ColumnOptions[] columns) : this(env, new KeyToVectorMappingTransformer(env, columns)) { } internal KeyToVectorMappingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null, bool bag = Defaults.Bag) - : this(env, new KeyToVectorMappingTransformer(env, new ColumnInfo(outputColumnName, inputColumnName ?? outputColumnName, bag))) + : this(env, new KeyToVectorMappingTransformer(env, new ColumnOptions(outputColumnName, inputColumnName ?? outputColumnName, bag))) { } diff --git a/src/Microsoft.ML.Data/Transforms/NormalizeColumn.cs b/src/Microsoft.ML.Data/Transforms/NormalizeColumn.cs index 9cf54dce5c..df13b33afe 100644 --- a/src/Microsoft.ML.Data/Transforms/NormalizeColumn.cs +++ b/src/Microsoft.ML.Data/Transforms/NormalizeColumn.cs @@ -275,7 +275,7 @@ public static IDataView CreateMinMaxNormalizer(IHostEnvironment env, IDataView i { Contracts.CheckValue(env, nameof(env)); - var normalizer = new NormalizingEstimator(env, new NormalizingEstimator.MinMaxColumn(outputColumnName, inputColumnName ?? outputColumnName)); + var normalizer = new NormalizingEstimator(env, new NormalizingEstimator.MinMaxColumnOptions(outputColumnName, inputColumnName ?? outputColumnName)); return normalizer.Fit(input).MakeDataTransform(input); } @@ -289,7 +289,7 @@ internal static IDataTransform Create(IHostEnvironment env, MinMaxArguments args env.CheckValue(args.Columns, nameof(args.Columns)); var columns = args.Columns - .Select(col => new NormalizingEstimator.MinMaxColumn( + .Select(col => new NormalizingEstimator.MinMaxColumnOptions( col.Name, col.Source ?? col.Name, col.MaxTrainingExamples ?? args.MaxTrainingExamples, @@ -307,7 +307,7 @@ internal static IDataTransform Create(IHostEnvironment env, MeanVarArguments arg env.CheckValue(args.Columns, nameof(args.Columns)); var columns = args.Columns - .Select(col => new NormalizingEstimator.MeanVarColumn( + .Select(col => new NormalizingEstimator.MeanVarColumnOptions( col.Name, col.Source ?? col.Name, col.MaxTrainingExamples ?? args.MaxTrainingExamples, @@ -327,7 +327,7 @@ internal static IDataTransform Create(IHostEnvironment env, LogMeanVarArguments env.CheckValue(args.Columns, nameof(args.Columns)); var columns = args.Columns - .Select(col => new NormalizingEstimator.LogMeanVarColumn( + .Select(col => new NormalizingEstimator.LogMeanVarColumnOptions( col.Name, col.Source ?? col.Name, col.MaxTrainingExamples ?? args.MaxTrainingExamples, @@ -347,7 +347,7 @@ internal static IDataTransform Create(IHostEnvironment env, BinArguments args, I env.CheckValue(args.Columns, nameof(args.Columns)); var columns = args.Columns - .Select(col => new NormalizingEstimator.BinningColumn( + .Select(col => new NormalizingEstimator.BinningColumnOptions( col.Name, col.Source ?? col.Name, col.MaxTrainingExamples ?? args.MaxTrainingExamples, @@ -924,14 +924,14 @@ public static IColumnFunctionBuilder CreateBuilder(MinMaxArguments args, IHost h Contracts.AssertValue(host); host.AssertValue(args); - return CreateBuilder(new NormalizingEstimator.MinMaxColumn( + return CreateBuilder(new NormalizingEstimator.MinMaxColumnOptions( args.Columns[icol].Name, args.Columns[icol].Source ?? args.Columns[icol].Name, args.Columns[icol].MaxTrainingExamples ?? args.MaxTrainingExamples, args.Columns[icol].FixZero ?? args.FixZero), host, srcIndex, srcType, cursor); } - public static IColumnFunctionBuilder CreateBuilder(NormalizingEstimator.MinMaxColumn column, IHost host, + public static IColumnFunctionBuilder CreateBuilder(NormalizingEstimator.MinMaxColumnOptions column, IHost host, int srcIndex, DataViewType srcType, DataViewRowCursor cursor) { if (srcType is NumberDataViewType) @@ -960,7 +960,7 @@ public static IColumnFunctionBuilder CreateBuilder(MeanVarArguments args, IHost Contracts.AssertValue(host); host.AssertValue(args); - return CreateBuilder(new NormalizingEstimator.MeanVarColumn( + return CreateBuilder(new NormalizingEstimator.MeanVarColumnOptions( args.Columns[icol].Name, args.Columns[icol].Source ?? args.Columns[icol].Name, args.Columns[icol].MaxTrainingExamples ?? args.MaxTrainingExamples, @@ -968,7 +968,7 @@ public static IColumnFunctionBuilder CreateBuilder(MeanVarArguments args, IHost args.UseCdf), host, srcIndex, srcType, cursor); } - public static IColumnFunctionBuilder CreateBuilder(NormalizingEstimator.MeanVarColumn column, IHost host, + public static IColumnFunctionBuilder CreateBuilder(NormalizingEstimator.MeanVarColumnOptions column, IHost host, int srcIndex, DataViewType srcType, DataViewRowCursor cursor) { Contracts.AssertValue(host); @@ -1000,14 +1000,14 @@ public static IColumnFunctionBuilder CreateBuilder(LogMeanVarArguments args, IHo Contracts.AssertValue(host); host.AssertValue(args); - return CreateBuilder(new NormalizingEstimator.LogMeanVarColumn( + return CreateBuilder(new NormalizingEstimator.LogMeanVarColumnOptions( args.Columns[icol].Name, args.Columns[icol].Source ?? args.Columns[icol].Name, args.Columns[icol].MaxTrainingExamples ?? args.MaxTrainingExamples, args.UseCdf), host, srcIndex, srcType, cursor); } - public static IColumnFunctionBuilder CreateBuilder(NormalizingEstimator.LogMeanVarColumn column, IHost host, + public static IColumnFunctionBuilder CreateBuilder(NormalizingEstimator.LogMeanVarColumnOptions column, IHost host, int srcIndex, DataViewType srcType, DataViewRowCursor cursor) { Contracts.AssertValue(host); @@ -1039,7 +1039,7 @@ public static IColumnFunctionBuilder CreateBuilder(BinArguments args, IHost host Contracts.AssertValue(host); host.AssertValue(args); - return CreateBuilder(new NormalizingEstimator.BinningColumn( + return CreateBuilder(new NormalizingEstimator.BinningColumnOptions( args.Columns[icol].Name, args.Columns[icol].Source ?? args.Columns[icol].Name, args.Columns[icol].MaxTrainingExamples ?? args.MaxTrainingExamples, @@ -1047,7 +1047,7 @@ public static IColumnFunctionBuilder CreateBuilder(BinArguments args, IHost host args.Columns[icol].NumBins ?? args.NumBins), host, srcIndex, srcType, cursor); } - public static IColumnFunctionBuilder CreateBuilder(NormalizingEstimator.BinningColumn column, IHost host, + public static IColumnFunctionBuilder CreateBuilder(NormalizingEstimator.BinningColumnOptions column, IHost host, int srcIndex, DataViewType srcType, DataViewRowCursor cursor) { Contracts.AssertValue(host); @@ -1088,7 +1088,7 @@ public static IColumnFunctionBuilder CreateBuilder(SupervisedBinArguments args, host.CheckUserArg(labelColumnType is NumberDataViewType, nameof(args.LabelColumn), "Label column must be a number or a key type"); return CreateBuilder( - new NormalizingEstimator.SupervisedBinningColumn( + new NormalizingEstimator.SupervisedBinningColumOptions( args.Columns[icol].Name, args.Columns[icol].Source ?? args.Columns[icol].Name, args.LabelColumn ?? DefaultColumnNames.Label, @@ -1099,14 +1099,14 @@ public static IColumnFunctionBuilder CreateBuilder(SupervisedBinArguments args, host, labelColumnId, srcIndex, srcType, cursor); } - public static IColumnFunctionBuilder CreateBuilder(NormalizingEstimator.SupervisedBinningColumn column, IHost host, + public static IColumnFunctionBuilder CreateBuilder(NormalizingEstimator.SupervisedBinningColumOptions column, IHost host, string labelColumn, int srcIndex, DataViewType srcType, DataViewRowCursor cursor) { int labelColumnId = GetLabelColumnId(host, cursor.Schema, labelColumn); return CreateBuilder(column, host, labelColumnId, srcIndex, srcType, cursor); } - private static IColumnFunctionBuilder CreateBuilder(NormalizingEstimator.SupervisedBinningColumn column, IHost host, + private static IColumnFunctionBuilder CreateBuilder(NormalizingEstimator.SupervisedBinningColumOptions column, IHost host, int labelColumnId, int srcIndex, DataViewType srcType, DataViewRowCursor cursor) { Contracts.AssertValue(host); diff --git a/src/Microsoft.ML.Data/Transforms/NormalizeColumnDbl.cs b/src/Microsoft.ML.Data/Transforms/NormalizeColumnDbl.cs index e7c7321700..12a765b1a6 100644 --- a/src/Microsoft.ML.Data/Transforms/NormalizeColumnDbl.cs +++ b/src/Microsoft.ML.Data/Transforms/NormalizeColumnDbl.cs @@ -1427,7 +1427,7 @@ private MinMaxOneColumnFunctionBuilder(IHost host, long lim, bool fix, ValueGett { } - public static IColumnFunctionBuilder Create(NormalizingEstimator.MinMaxColumn column, IHost host, DataViewType srcType, + public static IColumnFunctionBuilder Create(NormalizingEstimator.MinMaxColumnOptions column, IHost host, DataViewType srcType, ValueGetter getter) { host.CheckUserArg(column.MaxTrainingExamples > 1, nameof(column.MaxTrainingExamples), "Must be greater than 1"); @@ -1477,7 +1477,7 @@ private MinMaxVecColumnFunctionBuilder(IHost host, int cv, long lim, bool fix, { } - public static IColumnFunctionBuilder Create(NormalizingEstimator.MinMaxColumn column, IHost host, VectorType srcType, + public static IColumnFunctionBuilder Create(NormalizingEstimator.MinMaxColumnOptions column, IHost host, VectorType srcType, ValueGetter> getter) { host.CheckUserArg(column.MaxTrainingExamples > 1, nameof(column.MaxTrainingExamples), "Must be greater than 1"); @@ -1539,14 +1539,14 @@ private MeanVarOneColumnFunctionBuilder(IHost host, long lim, bool fix, ValueGet _buffer = new VBuffer(1, new TFloat[1]); } - public static IColumnFunctionBuilder Create(NormalizingEstimator.MeanVarColumn column, IHost host, DataViewType srcType, + public static IColumnFunctionBuilder Create(NormalizingEstimator.MeanVarColumnOptions column, IHost host, DataViewType srcType, ValueGetter getter) { host.CheckUserArg(column.MaxTrainingExamples > 1, nameof(column.MaxTrainingExamples), "Must be greater than 1"); return new MeanVarOneColumnFunctionBuilder(host, column.MaxTrainingExamples, column.FixZero, getter, false, column.UseCdf); } - public static IColumnFunctionBuilder Create(NormalizingEstimator.LogMeanVarColumn column, IHost host, DataViewType srcType, + public static IColumnFunctionBuilder Create(NormalizingEstimator.LogMeanVarColumnOptions column, IHost host, DataViewType srcType, ValueGetter getter) { var lim = column.MaxTrainingExamples; @@ -1613,7 +1613,7 @@ private MeanVarVecColumnFunctionBuilder(IHost host, int cv, long lim, bool fix, _useCdf = useCdf; } - public static IColumnFunctionBuilder Create(NormalizingEstimator.MeanVarColumn column, IHost host, VectorType srcType, + public static IColumnFunctionBuilder Create(NormalizingEstimator.MeanVarColumnOptions column, IHost host, VectorType srcType, ValueGetter> getter) { host.CheckUserArg(column.MaxTrainingExamples > 1, nameof(column.MaxTrainingExamples), "Must be greater than 1"); @@ -1621,7 +1621,7 @@ public static IColumnFunctionBuilder Create(NormalizingEstimator.MeanVarColumn c return new MeanVarVecColumnFunctionBuilder(host, cv, column.MaxTrainingExamples, column.FixZero, getter, false, column.UseCdf); } - public static IColumnFunctionBuilder Create(NormalizingEstimator.LogMeanVarColumn column, IHost host, VectorType srcType, + public static IColumnFunctionBuilder Create(NormalizingEstimator.LogMeanVarColumnOptions column, IHost host, VectorType srcType, ValueGetter> getter) { var lim = column.MaxTrainingExamples; @@ -1729,7 +1729,7 @@ private BinOneColumnFunctionBuilder(IHost host, long lim, bool fix, int numBins, _values = new List(); } - public static IColumnFunctionBuilder Create(NormalizingEstimator.BinningColumn column, IHost host, DataViewType srcType, + public static IColumnFunctionBuilder Create(NormalizingEstimator.BinningColumnOptions column, IHost host, DataViewType srcType, ValueGetter getter) { var lim = column.MaxTrainingExamples; @@ -1778,7 +1778,7 @@ private BinVecColumnFunctionBuilder(IHost host, int cv, long lim, bool fix, int } } - public static IColumnFunctionBuilder Create(NormalizingEstimator.BinningColumn column, IHost host, VectorType srcType, + public static IColumnFunctionBuilder Create(NormalizingEstimator.BinningColumnOptions column, IHost host, VectorType srcType, ValueGetter> getter) { var lim = column.MaxTrainingExamples; @@ -1862,7 +1862,7 @@ public override IColumnFunction CreateColumnFunction() return BinColumnFunction.Create(Host, binUpperBounds, _fix); } - public static IColumnFunctionBuilder Create(NormalizingEstimator.SupervisedBinningColumn column, IHost host, int valueColumnId, int labelColumnId, DataViewRow dataRow) + public static IColumnFunctionBuilder Create(NormalizingEstimator.SupervisedBinningColumOptions column, IHost host, int valueColumnId, int labelColumnId, DataViewRow dataRow) { var lim = column.MaxTrainingExamples; host.CheckUserArg(lim > 1, nameof(column.MaxTrainingExamples), "Must be greater than 1"); @@ -1902,7 +1902,7 @@ public override IColumnFunction CreateColumnFunction() return BinColumnFunction.Create(Host, binUpperBounds, _fix); } - public static IColumnFunctionBuilder Create(NormalizingEstimator.SupervisedBinningColumn column, IHost host, int valueColumnId, int labelColumnId, DataViewRow dataRow) + public static IColumnFunctionBuilder Create(NormalizingEstimator.SupervisedBinningColumOptions column, IHost host, int valueColumnId, int labelColumnId, DataViewRow dataRow) { var lim = column.MaxTrainingExamples; host.CheckUserArg(lim > 1, nameof(column.MaxTrainingExamples), "Must be greater than 1"); diff --git a/src/Microsoft.ML.Data/Transforms/NormalizeColumnSng.cs b/src/Microsoft.ML.Data/Transforms/NormalizeColumnSng.cs index 1b3222994b..8a5d33f53c 100644 --- a/src/Microsoft.ML.Data/Transforms/NormalizeColumnSng.cs +++ b/src/Microsoft.ML.Data/Transforms/NormalizeColumnSng.cs @@ -1434,7 +1434,7 @@ private MinMaxOneColumnFunctionBuilder(IHost host, long lim, bool fix, ValueGett { } - public static IColumnFunctionBuilder Create(NormalizingEstimator.MinMaxColumn column, IHost host, DataViewType srcType, + public static IColumnFunctionBuilder Create(NormalizingEstimator.MinMaxColumnOptions column, IHost host, DataViewType srcType, ValueGetter getter) { host.CheckUserArg(column.MaxTrainingExamples > 1, nameof(column.MaxTrainingExamples), "Must be greater than 1"); @@ -1484,7 +1484,7 @@ private MinMaxVecColumnFunctionBuilder(IHost host, int cv, long lim, bool fix, { } - public static IColumnFunctionBuilder Create(NormalizingEstimator.MinMaxColumn column, IHost host, VectorType srcType, + public static IColumnFunctionBuilder Create(NormalizingEstimator.MinMaxColumnOptions column, IHost host, VectorType srcType, ValueGetter> getter) { host.CheckUserArg(column.MaxTrainingExamples > 1, nameof(column.MaxTrainingExamples), "Must be greater than 1"); @@ -1546,14 +1546,14 @@ private MeanVarOneColumnFunctionBuilder(IHost host, long lim, bool fix, ValueGet _buffer = new VBuffer(1, new TFloat[1]); } - public static IColumnFunctionBuilder Create(NormalizingEstimator.MeanVarColumn column, IHost host, DataViewType srcType, + public static IColumnFunctionBuilder Create(NormalizingEstimator.MeanVarColumnOptions column, IHost host, DataViewType srcType, ValueGetter getter) { host.CheckUserArg(column.MaxTrainingExamples > 1, nameof(column.MaxTrainingExamples), "Must be greater than 1"); return new MeanVarOneColumnFunctionBuilder(host, column.MaxTrainingExamples, column.FixZero, getter, false, column.UseCdf); } - public static IColumnFunctionBuilder Create(NormalizingEstimator.LogMeanVarColumn column, IHost host, DataViewType srcType, + public static IColumnFunctionBuilder Create(NormalizingEstimator.LogMeanVarColumnOptions column, IHost host, DataViewType srcType, ValueGetter getter) { var lim = column.MaxTrainingExamples; @@ -1620,7 +1620,7 @@ private MeanVarVecColumnFunctionBuilder(IHost host, int cv, long lim, bool fix, _useCdf = useCdf; } - public static IColumnFunctionBuilder Create(NormalizingEstimator.MeanVarColumn column, IHost host, VectorType srcType, + public static IColumnFunctionBuilder Create(NormalizingEstimator.MeanVarColumnOptions column, IHost host, VectorType srcType, ValueGetter> getter) { host.CheckUserArg(column.MaxTrainingExamples > 1, nameof(column.MaxTrainingExamples), "Must be greater than 1"); @@ -1628,7 +1628,7 @@ public static IColumnFunctionBuilder Create(NormalizingEstimator.MeanVarColumn c return new MeanVarVecColumnFunctionBuilder(host, cv, column.MaxTrainingExamples, column.FixZero, getter, false, column.UseCdf); } - public static IColumnFunctionBuilder Create(NormalizingEstimator.LogMeanVarColumn column, IHost host, VectorType srcType, + public static IColumnFunctionBuilder Create(NormalizingEstimator.LogMeanVarColumnOptions column, IHost host, VectorType srcType, ValueGetter> getter) { var lim = column.MaxTrainingExamples; @@ -1736,7 +1736,7 @@ private BinOneColumnFunctionBuilder(IHost host, long lim, bool fix, int numBins, _values = new List(); } - public static IColumnFunctionBuilder Create(NormalizingEstimator.BinningColumn column, IHost host, DataViewType srcType, + public static IColumnFunctionBuilder Create(NormalizingEstimator.BinningColumnOptions column, IHost host, DataViewType srcType, ValueGetter getter) { var lim = column.MaxTrainingExamples; @@ -1785,7 +1785,7 @@ private BinVecColumnFunctionBuilder(IHost host, int cv, long lim, bool fix, int } } - public static IColumnFunctionBuilder Create(NormalizingEstimator.BinningColumn column, IHost host, VectorType srcType, + public static IColumnFunctionBuilder Create(NormalizingEstimator.BinningColumnOptions column, IHost host, VectorType srcType, ValueGetter> getter) { var lim = column.MaxTrainingExamples; @@ -1870,7 +1870,7 @@ public override IColumnFunction CreateColumnFunction() return BinColumnFunction.Create(Host, binUpperBounds, _fix); } - public static IColumnFunctionBuilder Create(NormalizingEstimator.SupervisedBinningColumn column, IHost host, int valueColumnId, int labelColumnId, DataViewRow dataRow) + public static IColumnFunctionBuilder Create(NormalizingEstimator.SupervisedBinningColumOptions column, IHost host, int valueColumnId, int labelColumnId, DataViewRow dataRow) { var lim = column.MaxTrainingExamples; host.CheckUserArg(lim > 1, nameof(column.MaxTrainingExamples), "Must be greater than 1"); @@ -1910,7 +1910,7 @@ public override IColumnFunction CreateColumnFunction() return BinColumnFunction.Create(Host, binUpperBounds, _fix); } - public static IColumnFunctionBuilder Create(NormalizingEstimator.SupervisedBinningColumn column, IHost host, int valueColumnId, int labelColumnId, DataViewRow dataRow) + public static IColumnFunctionBuilder Create(NormalizingEstimator.SupervisedBinningColumOptions column, IHost host, int valueColumnId, int labelColumnId, DataViewRow dataRow) { var lim = column.MaxTrainingExamples; host.CheckUserArg(lim > 1, nameof(column.MaxTrainingExamples), "Must be greater than 1"); diff --git a/src/Microsoft.ML.Data/Transforms/Normalizer.cs b/src/Microsoft.ML.Data/Transforms/Normalizer.cs index 9370cfc672..beb3af8004 100644 --- a/src/Microsoft.ML.Data/Transforms/Normalizer.cs +++ b/src/Microsoft.ML.Data/Transforms/Normalizer.cs @@ -64,13 +64,13 @@ public enum NormalizerMode SupervisedBinning = 4 } - public abstract class ColumnBase + public abstract class ColumnOptionsBase { public readonly string Name; public readonly string InputColumnName; public readonly long MaxTrainingExamples; - private protected ColumnBase(string name, string inputColumnName, long maxTrainingExamples) + private protected ColumnOptionsBase(string name, string inputColumnName, long maxTrainingExamples) { Contracts.CheckNonEmpty(name, nameof(name)); Contracts.CheckNonEmpty(inputColumnName, nameof(inputColumnName)); @@ -83,40 +83,40 @@ private protected ColumnBase(string name, string inputColumnName, long maxTraini internal abstract IColumnFunctionBuilder MakeBuilder(IHost host, int srcIndex, DataViewType srcType, DataViewRowCursor cursor); - internal static ColumnBase Create(string outputColumnName, string inputColumnName, NormalizerMode mode) + internal static ColumnOptionsBase Create(string outputColumnName, string inputColumnName, NormalizerMode mode) { switch (mode) { case NormalizerMode.MinMax: - return new MinMaxColumn(outputColumnName, inputColumnName); + return new MinMaxColumnOptions(outputColumnName, inputColumnName); case NormalizerMode.MeanVariance: - return new MeanVarColumn(outputColumnName, inputColumnName); + return new MeanVarColumnOptions(outputColumnName, inputColumnName); case NormalizerMode.LogMeanVariance: - return new LogMeanVarColumn(outputColumnName, inputColumnName); + return new LogMeanVarColumnOptions(outputColumnName, inputColumnName); case NormalizerMode.Binning: - return new BinningColumn(outputColumnName, inputColumnName); + return new BinningColumnOptions(outputColumnName, inputColumnName); case NormalizerMode.SupervisedBinning: - return new SupervisedBinningColumn(outputColumnName, inputColumnName); + return new SupervisedBinningColumOptions(outputColumnName, inputColumnName); default: throw Contracts.ExceptParam(nameof(mode), "Unknown normalizer mode"); } } } - public abstract class FixZeroColumnBase : ColumnBase + public abstract class FixZeroColumnOptionsBase : ColumnOptionsBase { public readonly bool FixZero; - private protected FixZeroColumnBase(string outputColumnName, string inputColumnName, long maxTrainingExamples, bool fixZero) + private protected FixZeroColumnOptionsBase(string outputColumnName, string inputColumnName, long maxTrainingExamples, bool fixZero) : base(outputColumnName, inputColumnName, maxTrainingExamples) { FixZero = fixZero; } } - public sealed class MinMaxColumn : FixZeroColumnBase + public sealed class MinMaxColumnOptions : FixZeroColumnOptionsBase { - public MinMaxColumn(string outputColumnName, string inputColumnName = null, long maxTrainingExamples = Defaults.MaxTrainingExamples, bool fixZero = Defaults.FixZero) + public MinMaxColumnOptions(string outputColumnName, string inputColumnName = null, long maxTrainingExamples = Defaults.MaxTrainingExamples, bool fixZero = Defaults.FixZero) : base(outputColumnName, inputColumnName ?? outputColumnName, maxTrainingExamples, fixZero) { } @@ -125,11 +125,11 @@ internal override IColumnFunctionBuilder MakeBuilder(IHost host, int srcIndex, D => NormalizeTransform.MinMaxUtils.CreateBuilder(this, host, srcIndex, srcType, cursor); } - public sealed class MeanVarColumn : FixZeroColumnBase + public sealed class MeanVarColumnOptions : FixZeroColumnOptionsBase { public readonly bool UseCdf; - public MeanVarColumn(string outputColumnName, string inputColumnName = null, + public MeanVarColumnOptions(string outputColumnName, string inputColumnName = null, long maxTrainingExamples = Defaults.MaxTrainingExamples, bool fixZero = Defaults.FixZero, bool useCdf = Defaults.MeanVarCdf) : base(outputColumnName, inputColumnName ?? outputColumnName, maxTrainingExamples, fixZero) { @@ -140,11 +140,11 @@ internal override IColumnFunctionBuilder MakeBuilder(IHost host, int srcIndex, D => NormalizeTransform.MeanVarUtils.CreateBuilder(this, host, srcIndex, srcType, cursor); } - public sealed class LogMeanVarColumn : ColumnBase + public sealed class LogMeanVarColumnOptions : ColumnOptionsBase { public readonly bool UseCdf; - public LogMeanVarColumn(string outputColumnName, string inputColumnName = null, + public LogMeanVarColumnOptions(string outputColumnName, string inputColumnName = null, long maxTrainingExamples = Defaults.MaxTrainingExamples, bool useCdf = Defaults.LogMeanVarCdf) : base(outputColumnName, inputColumnName ?? outputColumnName, maxTrainingExamples) { @@ -155,11 +155,11 @@ internal override IColumnFunctionBuilder MakeBuilder(IHost host, int srcIndex, D => NormalizeTransform.LogMeanVarUtils.CreateBuilder(this, host, srcIndex, srcType, cursor); } - public sealed class BinningColumn : FixZeroColumnBase + public sealed class BinningColumnOptions : FixZeroColumnOptionsBase { public readonly int NumBins; - public BinningColumn(string outputColumnName, string inputColumnName = null, + public BinningColumnOptions(string outputColumnName, string inputColumnName = null, long maxTrainingExamples = Defaults.MaxTrainingExamples, bool fixZero = true, int numBins = Defaults.NumBins) : base(outputColumnName, inputColumnName ?? outputColumnName, maxTrainingExamples, fixZero) { @@ -170,13 +170,13 @@ internal override IColumnFunctionBuilder MakeBuilder(IHost host, int srcIndex, D => NormalizeTransform.BinUtils.CreateBuilder(this, host, srcIndex, srcType, cursor); } - public sealed class SupervisedBinningColumn : FixZeroColumnBase + public sealed class SupervisedBinningColumOptions : FixZeroColumnOptionsBase { public readonly int NumBins; public readonly string LabelColumn; public readonly int MinBinSize; - public SupervisedBinningColumn(string outputColumnName, string inputColumnName = null, + public SupervisedBinningColumOptions(string outputColumnName, string inputColumnName = null, string labelColumn = DefaultColumnNames.Label, long maxTrainingExamples = Defaults.MaxTrainingExamples, bool fixZero = true, @@ -194,7 +194,7 @@ internal override IColumnFunctionBuilder MakeBuilder(IHost host, int srcIndex, D } private readonly IHost _host; - private readonly ColumnBase[] _columns; + private readonly ColumnOptionsBase[] _columns; /// /// Initializes a new instance of . @@ -220,15 +220,15 @@ internal NormalizingEstimator(IHostEnvironment env, NormalizerMode mode, params Contracts.CheckValue(env, nameof(env)); _host = env.Register(nameof(NormalizingEstimator)); _host.CheckValue(columns, nameof(columns)); - _columns = columns.Select(x => ColumnBase.Create(x.outputColumnName, x.inputColumnName, mode)).ToArray(); + _columns = columns.Select(x => ColumnOptionsBase.Create(x.outputColumnName, x.inputColumnName, mode)).ToArray(); } /// /// Initializes a new instance of . /// /// The private instance of the . - /// An array of defining the inputs to the Normalizer, and their settings. - internal NormalizingEstimator(IHostEnvironment env, params ColumnBase[] columns) + /// An array of defining the inputs to the Normalizer, and their settings. + internal NormalizingEstimator(IHostEnvironment env, params ColumnOptionsBase[] columns) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(nameof(NormalizingEstimator)); @@ -309,7 +309,7 @@ private static VersionInfo GetVersionInfo() loaderAssemblyName: typeof(NormalizingTransformer).Assembly.FullName); } - public sealed class ColumnInfo + public sealed class ColumnOptions { public readonly string Name; public readonly string InputColumnName; @@ -317,7 +317,7 @@ public sealed class ColumnInfo internal readonly DataViewType InputType; internal readonly IColumnFunction ColumnFunction; - internal ColumnInfo(string name, string inputColumnName, DataViewType inputType, IColumnFunction columnFunction) + internal ColumnOptions(string name, string inputColumnName, DataViewType inputType, IColumnFunction columnFunction) { Name = name; InputColumnName = inputColumnName; @@ -367,9 +367,9 @@ internal static void SaveType(ModelSaveContext ctx, DataViewType type) private sealed class ColumnFunctionAccessor : IReadOnlyList { - private readonly ImmutableArray _infos; + private readonly ImmutableArray _infos; - public ColumnFunctionAccessor(ImmutableArray infos) + public ColumnFunctionAccessor(ImmutableArray infos) { _infos = infos; } @@ -384,15 +384,15 @@ public ColumnFunctionAccessor(ImmutableArray infos) [BestFriend] internal readonly IReadOnlyList ColumnFunctions; - public readonly ImmutableArray Columns; - private NormalizingTransformer(IHostEnvironment env, ColumnInfo[] columns) + public readonly ImmutableArray Columns; + private NormalizingTransformer(IHostEnvironment env, ColumnOptions[] columns) : base(env.Register(nameof(NormalizingTransformer)), columns.Select(x => (x.Name, x.InputColumnName)).ToArray()) { Columns = ImmutableArray.Create(columns); ColumnFunctions = new ColumnFunctionAccessor(Columns); } - internal static NormalizingTransformer Train(IHostEnvironment env, IDataView data, NormalizingEstimator.ColumnBase[] columns) + internal static NormalizingTransformer Train(IHostEnvironment env, IDataView data, NormalizingEstimator.ColumnOptionsBase[] columns) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(data, nameof(data)); @@ -411,7 +411,7 @@ internal static NormalizingTransformer Train(IHostEnvironment env, IDataView dat srcTypes[i] = data.Schema[srcCols[i]].Type; activeCols.Add(data.Schema[srcCols[i]]); - var supervisedBinColumn = info as NormalizingEstimator.SupervisedBinningColumn; + var supervisedBinColumn = info as NormalizingEstimator.SupervisedBinningColumOptions; if(supervisedBinColumn != null) activeCols.Add(data.Schema[supervisedBinColumn.LabelColumn]); } @@ -461,11 +461,11 @@ internal static NormalizingTransformer Train(IHostEnvironment env, IDataView dat pch.Checkpoint(numRows); - var result = new ColumnInfo[columns.Length]; + var result = new ColumnOptions[columns.Length]; for (int i = 0; i < columns.Length; i++) { var func = functionBuilders[i].CreateColumnFunction(); - result[i] = new ColumnInfo(columns[i].Name, columns[i].InputColumnName, srcTypes[i], func); + result[i] = new ColumnOptions(columns[i].Name, columns[i].InputColumnName, srcTypes[i], func); } return new NormalizingTransformer(env, result); @@ -481,14 +481,14 @@ private NormalizingTransformer(IHost host, ModelLoadContext ctx) // - source type // - separate model for column function - var cols = new ColumnInfo[ColumnPairs.Length]; + var cols = new ColumnOptions[ColumnPairs.Length]; ColumnFunctions = new ColumnFunctionAccessor(Columns); for (int iinfo = 0; iinfo < ColumnPairs.Length; iinfo++) { var dir = string.Format("Normalizer_{0:000}", iinfo); - var typeSrc = ColumnInfo.LoadType(ctx); + var typeSrc = ColumnOptions.LoadType(ctx); ctx.LoadModel(Host, out var function, dir, Host, typeSrc); - cols[iinfo] = new ColumnInfo(ColumnPairs[iinfo].outputColumnName, ColumnPairs[iinfo].inputColumnName, typeSrc, function); + cols[iinfo] = new ColumnOptions(ColumnPairs[iinfo].outputColumnName, ColumnPairs[iinfo].inputColumnName, typeSrc, function); } Columns = ImmutableArray.Create(cols); @@ -502,14 +502,14 @@ private NormalizingTransformer(IHost host, ModelLoadContext ctx, IDataView input // // for each added column: // - separate model for column function - var cols = new ColumnInfo[ColumnPairs.Length]; + var cols = new ColumnOptions[ColumnPairs.Length]; ColumnFunctions = new ColumnFunctionAccessor(Columns); for (int iinfo = 0; iinfo < ColumnPairs.Length; iinfo++) { var dir = string.Format("Normalizer_{0:000}", iinfo); var typeSrc = input.Schema[ColumnPairs[iinfo].inputColumnName].Type; ctx.LoadModel(Host, out var function, dir, Host, typeSrc); - cols[iinfo] = new ColumnInfo(ColumnPairs[iinfo].outputColumnName, ColumnPairs[iinfo].inputColumnName, typeSrc, function); + cols[iinfo] = new ColumnOptions(ColumnPairs[iinfo].outputColumnName, ColumnPairs[iinfo].inputColumnName, typeSrc, function); } Columns = ImmutableArray.Create(cols); @@ -555,7 +555,7 @@ private protected override void SaveModel(ModelSaveContext ctx) // Individual normalization models. for (int iinfo = 0; iinfo < Columns.Length; iinfo++) { - ColumnInfo.SaveType(ctx, Columns[iinfo].InputType); + ColumnOptions.SaveType(ctx, Columns[iinfo].InputType); var dir = string.Format("Normalizer_{0:000}", iinfo); ctx.SaveSubModel(dir, Columns[iinfo].ColumnFunction.Save); } @@ -673,7 +673,7 @@ public void SaveAsPfa(BoundPfaContext ctx) ctx.DeclareVar(toDeclare.ToArray()); } - private JToken SaveAsPfaCore(BoundPfaContext ctx, int iinfo, ColumnInfo info, JToken srcToken) + private JToken SaveAsPfaCore(BoundPfaContext ctx, int iinfo, ColumnOptions info, JToken srcToken) { Contracts.AssertValue(ctx); Contracts.Assert(0 <= iinfo && iinfo < _parent.Columns.Length); @@ -683,7 +683,7 @@ private JToken SaveAsPfaCore(BoundPfaContext ctx, int iinfo, ColumnInfo info, JT return info.ColumnFunction.PfaInfo(ctx, srcToken); } - private bool SaveAsOnnxCore(OnnxContext ctx, int iinfo, ColumnInfo info, string srcVariableName, string dstVariableName) + private bool SaveAsOnnxCore(OnnxContext ctx, int iinfo, ColumnOptions info, string srcVariableName, string dstVariableName) { Contracts.AssertValue(ctx); Contracts.Assert(0 <= iinfo && iinfo < _parent.Columns.Length); diff --git a/src/Microsoft.ML.Data/Transforms/NormalizerCatalog.cs b/src/Microsoft.ML.Data/Transforms/NormalizerCatalog.cs index 5956d80df3..0c34ba805b 100644 --- a/src/Microsoft.ML.Data/Transforms/NormalizerCatalog.cs +++ b/src/Microsoft.ML.Data/Transforms/NormalizerCatalog.cs @@ -46,8 +46,8 @@ public static NormalizingEstimator Normalize(this TransformsCatalog catalog, /// public static NormalizingEstimator Normalize(this TransformsCatalog catalog, NormalizingEstimator.NormalizerMode mode, - params SimpleColumnInfo[] columns) - => new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), mode, SimpleColumnInfo.ConvertToValueTuples(columns)); + params ColumnOptions[] columns) + => new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), mode, ColumnOptions.ConvertToValueTuples(columns)); /// /// Normalize (rescale) columns according to specified custom parameters. @@ -55,7 +55,7 @@ public static NormalizingEstimator Normalize(this TransformsCatalog catalog, /// The transform catalog /// The normalization settings for all the columns public static NormalizingEstimator Normalize(this TransformsCatalog catalog, - params NormalizingEstimator.ColumnBase[] columns) + params NormalizingEstimator.ColumnOptionsBase[] columns) => new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columns); } } diff --git a/src/Microsoft.ML.Data/Transforms/SlotsDroppingTransformer.cs b/src/Microsoft.ML.Data/Transforms/SlotsDroppingTransformer.cs index 592e5b1826..d1f879622c 100644 --- a/src/Microsoft.ML.Data/Transforms/SlotsDroppingTransformer.cs +++ b/src/Microsoft.ML.Data/Transforms/SlotsDroppingTransformer.cs @@ -193,7 +193,7 @@ public bool IsValid() /// Describes how the transformer handles one input-output column pair. /// [BestFriend] - internal sealed class ColumnInfo + internal sealed class ColumnOptions { public readonly string Name; public readonly string InputColumnName; @@ -206,7 +206,7 @@ internal sealed class ColumnInfo /// Name of the column to transform. /// If set to , the value of the will be used as source. /// Ranges of indices in the input column to be dropped. Setting max in to null sets max to int.MaxValue. - public ColumnInfo(string name, string inputColumnName = null, params (int min, int? max)[] slots) + public ColumnOptions(string name, string inputColumnName = null, params (int min, int? max)[] slots) { Name = name; Contracts.CheckValue(Name, nameof(Name)); @@ -219,7 +219,7 @@ public ColumnInfo(string name, string inputColumnName = null, params (int min, i Contracts.Assert(min >= 0 && (max == null || min <= max)); } - internal ColumnInfo(Column column) + internal ColumnOptions(Column column) { Name = column.Name; Contracts.CheckValue(Name, nameof(Name)); @@ -261,7 +261,7 @@ private static VersionInfo GetVersionInfo() /// Specifies the lower bound of the range of slots to be dropped. The lower bound is inclusive. /// Specifies the upper bound of the range of slots to be dropped. The upper bound is exclusive. internal SlotsDroppingTransformer(IHostEnvironment env, string outputColumnName, string inputColumnName = null, int min = default, int? max = null) - : this(env, new ColumnInfo(outputColumnName, inputColumnName, (min, max))) + : this(env, new ColumnOptions(outputColumnName, inputColumnName, (min, max))) { } @@ -270,7 +270,7 @@ internal SlotsDroppingTransformer(IHostEnvironment env, string outputColumnName, /// /// The environment to use. /// Specifies the ranges of slots to drop for each column pair. - internal SlotsDroppingTransformer(IHostEnvironment env, params ColumnInfo[] columns) + internal SlotsDroppingTransformer(IHostEnvironment env, params ColumnOptions[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(RegistrationName), GetColumnPairs(columns)) { Host.AssertNonEmpty(ColumnPairs); @@ -312,7 +312,7 @@ private static SlotsDroppingTransformer Create(IHostEnvironment env, ModelLoadCo // Factory method for SignatureDataTransform. private static IDataTransform Create(IHostEnvironment env, Options options, IDataView input) { - var columns = options.Columns.Select(column => new ColumnInfo(column)).ToArray(); + var columns = options.Columns.Select(column => new ColumnOptions(column)).ToArray(); return new SlotsDroppingTransformer(env, columns).MakeDataTransform(input); } @@ -376,7 +376,7 @@ private void GetSlotsMinMax(Column col, out int[] slotsMin, out int[] slotsMax) } } - private static void GetSlotsMinMax(ColumnInfo[] columns, out int[][] slotsMin, out int[][] slotsMax) + private static void GetSlotsMinMax(ColumnOptions[] columns, out int[][] slotsMin, out int[][] slotsMax) { slotsMin = new int[columns.Length][]; slotsMax = new int[columns.Length][]; @@ -413,7 +413,7 @@ private static void GetSlotsMinMax(ColumnInfo[] columns, out int[][] slotsMin, o } } - private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(ColumnInfo[] columns) + private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(ColumnOptions[] columns) => columns.Select(c => (c.Name, c.InputColumnName ?? c.Name)).ToArray(); private static bool AreRangesValid(int[][] slotsMin, int[][] slotsMax) diff --git a/src/Microsoft.ML.Data/Transforms/TypeConverting.cs b/src/Microsoft.ML.Data/Transforms/TypeConverting.cs index 1f425245b8..95acff6789 100644 --- a/src/Microsoft.ML.Data/Transforms/TypeConverting.cs +++ b/src/Microsoft.ML.Data/Transforms/TypeConverting.cs @@ -171,13 +171,13 @@ private static VersionInfo GetVersionInfo() private const string RegistrationName = "Convert"; /// - /// A collection of describing the settings of the transformation. + /// A collection of describing the settings of the transformation. /// - public IReadOnlyCollection Columns => _columns.AsReadOnly(); + public IReadOnlyCollection Columns => _columns.AsReadOnly(); - private readonly TypeConvertingEstimator.ColumnInfo[] _columns; + private readonly TypeConvertingEstimator.ColumnOptions[] _columns; - private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(TypeConvertingEstimator.ColumnInfo[] columns) + private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(TypeConvertingEstimator.ColumnOptions[] columns) { Contracts.CheckNonEmpty(columns, nameof(columns)); return columns.Select(x => (x.Name, x.InputColumnName)).ToArray(); @@ -192,14 +192,14 @@ private static (string outputColumnName, string inputColumnName)[] GetColumnPair /// The expected type of the converted column. /// New key count if we work with key type. internal TypeConvertingTransformer(IHostEnvironment env, string outputColumnName, DataKind outputKind, string inputColumnName = null, KeyCount outputKeyCount = null) - : this(env, new TypeConvertingEstimator.ColumnInfo(outputColumnName, outputKind, inputColumnName ?? outputColumnName, outputKeyCount)) + : this(env, new TypeConvertingEstimator.ColumnOptions(outputColumnName, outputKind, inputColumnName ?? outputColumnName, outputKeyCount)) { } /// /// Create a that takes multiple pairs of columns. /// - internal TypeConvertingTransformer(IHostEnvironment env, params TypeConvertingEstimator.ColumnInfo[] columns) + internal TypeConvertingTransformer(IHostEnvironment env, params TypeConvertingEstimator.ColumnOptions[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(TypeConvertingTransformer)), GetColumnPairs(columns)) { _columns = columns.ToArray(); @@ -260,7 +260,7 @@ private TypeConvertingTransformer(IHost host, ModelLoadContext ctx) // if there is a keyCount // ulong: keyCount (0 for unspecified) - _columns = new TypeConvertingEstimator.ColumnInfo[columnsLength]; + _columns = new TypeConvertingEstimator.ColumnOptions[columnsLength]; for (int i = 0; i < columnsLength; i++) { byte b = ctx.Reader.ReadByte(); @@ -289,7 +289,7 @@ private TypeConvertingTransformer(IHost host, ModelLoadContext ctx) keyCount = new KeyCount(count); } - _columns[i] = new TypeConvertingEstimator.ColumnInfo(ColumnPairs[i].outputColumnName, kind.ToDataKind(), ColumnPairs[i].inputColumnName, keyCount); + _columns[i] = new TypeConvertingEstimator.ColumnOptions(ColumnPairs[i].outputColumnName, kind.ToDataKind(), ColumnPairs[i].inputColumnName, keyCount); } } @@ -301,7 +301,7 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa env.CheckValue(input, nameof(input)); env.CheckValue(options.Columns, nameof(options.Columns)); - var cols = new TypeConvertingEstimator.ColumnInfo[options.Columns.Length]; + var cols = new TypeConvertingEstimator.ColumnOptions[options.Columns.Length]; for (int i = 0; i < cols.Length; i++) { var item = options.Columns[i]; @@ -337,7 +337,7 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa { kind = tempResultType.Value; } - cols[i] = new TypeConvertingEstimator.ColumnInfo(item.Name, kind.ToDataKind(), item.Source ?? item.Name, keyCount); + cols[i] = new TypeConvertingEstimator.ColumnOptions(item.Name, kind.ToDataKind(), item.Source ?? item.Name, keyCount); }; return new TypeConvertingTransformer(env, cols).MakeDataTransform(input); } @@ -527,7 +527,7 @@ internal sealed class Defaults /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnInfo + public sealed class ColumnOptions { /// /// Name of the column resulting from the transformation of . @@ -553,7 +553,7 @@ public sealed class ColumnInfo /// The expected kind of the converted column. /// Name of column to transform. If set to , the value of the will be used as source. /// New key count, if we work with key type. - public ColumnInfo(string name, DataKind outputKind, string inputColumnName, KeyCount outputKeyCount = null) + public ColumnOptions(string name, DataKind outputKind, string inputColumnName, KeyCount outputKeyCount = null) { Name = name; InputColumnName = inputColumnName ?? name; @@ -568,7 +568,7 @@ public ColumnInfo(string name, DataKind outputKind, string inputColumnName, KeyC /// The expected kind of the converted column. /// Name of column to transform. If set to , the value of the will be used as source. /// New key count, if we work with key type. - public ColumnInfo(string name, Type type, string inputColumnName, KeyCount outputKeyCount = null) + public ColumnOptions(string name, Type type, string inputColumnName, KeyCount outputKeyCount = null) { Name = name; InputColumnName = inputColumnName ?? name; @@ -589,14 +589,14 @@ public ColumnInfo(string name, Type type, string inputColumnName, KeyCount outpu internal TypeConvertingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null, DataKind outputKind = Defaults.DefaultOutputKind) - : this(env, new ColumnInfo(outputColumnName, outputKind, inputColumnName ?? outputColumnName)) + : this(env, new ColumnOptions(outputColumnName, outputKind, inputColumnName ?? outputColumnName)) { } /// /// Create a that takes multiple pairs of columns. /// - internal TypeConvertingEstimator(IHostEnvironment env, params ColumnInfo[] columns) : + internal TypeConvertingEstimator(IHostEnvironment env, params ColumnOptions[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(TypeConvertingEstimator)), new TypeConvertingTransformer(env, columns)) { } diff --git a/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingEstimator.cs b/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingEstimator.cs index 62458dec65..3055c5b910 100644 --- a/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingEstimator.cs +++ b/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingEstimator.cs @@ -32,7 +32,7 @@ public enum SortOrder : byte /// /// Describes how the transformer handles one column pair. /// - public class ColumnInfo + public class ColumnOptions { public readonly string OutputColumnName; public readonly string InputColumnName; @@ -53,7 +53,7 @@ public class ColumnInfo /// If , items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a'). /// List of terms. /// Whether key value metadata should be text, regardless of the actual input type. - public ColumnInfo(string outputColumnName, string inputColumnName = null, + public ColumnOptions(string outputColumnName, string inputColumnName = null, int maxNumKeys = Defaults.MaxNumKeys, SortOrder sort = Defaults.Sort, string[] term = null, @@ -71,7 +71,7 @@ public ColumnInfo(string outputColumnName, string inputColumnName = null, } private readonly IHost _host; - private readonly ColumnInfo[] _columns; + private readonly ColumnOptions[] _columns; private readonly IDataView _keyData; /// @@ -84,11 +84,11 @@ public ColumnInfo(string outputColumnName, string inputColumnName = null, /// How items should be ordered when vectorized. If choosen they will be in the order encountered. /// If , items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a'). internal ValueToKeyMappingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null, int maxNumKeys = Defaults.MaxNumKeys, SortOrder sort = Defaults.Sort) : - this(env, new [] { new ColumnInfo(outputColumnName, inputColumnName ?? outputColumnName, maxNumKeys, sort) }) + this(env, new [] { new ColumnOptions(outputColumnName, inputColumnName ?? outputColumnName, maxNumKeys, sort) }) { } - internal ValueToKeyMappingEstimator(IHostEnvironment env, ColumnInfo[] columns, IDataView keyData = null) + internal ValueToKeyMappingEstimator(IHostEnvironment env, ColumnOptions[] columns, IDataView keyData = null) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(nameof(ValueToKeyMappingEstimator)); diff --git a/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingTransformer.cs b/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingTransformer.cs index 631b2c2b85..9ce236c404 100644 --- a/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingTransformer.cs +++ b/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingTransformer.cs @@ -215,7 +215,7 @@ private static VersionInfo GetTermManagerVersionInfo() private readonly bool[] _textMetadata; private const string RegistrationName = "Term"; - private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(ValueToKeyMappingEstimator.ColumnInfo[] columns) + private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(ValueToKeyMappingEstimator.ColumnOptions[] columns) { Contracts.CheckValue(columns, nameof(columns)); return columns.Select(x => (x.OutputColumnName, x.InputColumnName)).ToArray(); @@ -249,12 +249,12 @@ private ColInfo[] CreateInfos(DataViewSchema inputSchema) } internal ValueToKeyMappingTransformer(IHostEnvironment env, IDataView input, - params ValueToKeyMappingEstimator.ColumnInfo[] columns) : + params ValueToKeyMappingEstimator.ColumnOptions[] columns) : this(env, input, columns, null, false) { } internal ValueToKeyMappingTransformer(IHostEnvironment env, IDataView input, - ValueToKeyMappingEstimator.ColumnInfo[] columns, IDataView keyData, bool autoConvert) + ValueToKeyMappingEstimator.ColumnOptions[] columns, IDataView keyData, bool autoConvert) : base(Contracts.CheckRef(env, nameof(env)).Register(RegistrationName), GetColumnPairs(columns)) { using (var ch = Host.Start("Training")) @@ -277,7 +277,7 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa env.CheckValue(input, nameof(input)); env.CheckValue(options.Columns, nameof(options.Columns)); - var cols = new ValueToKeyMappingEstimator.ColumnInfo[options.Columns.Length]; + var cols = new ValueToKeyMappingEstimator.ColumnOptions[options.Columns.Length]; using (var ch = env.Start("ValidateArgs")) { if ((options.Terms != null || !string.IsNullOrEmpty(options.Term)) && @@ -296,7 +296,7 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa if (!Enum.IsDefined(typeof(ValueToKeyMappingEstimator.SortOrder), sortOrder)) throw env.ExceptUserArg(nameof(options.Sort), "Undefined sorting criteria '{0}' detected for column '{1}'", sortOrder, item.Name); - cols[i] = new ValueToKeyMappingEstimator.ColumnInfo( + cols[i] = new ValueToKeyMappingEstimator.ColumnOptions( item.Name, item.Source ?? item.Name, item.MaxNumTerms ?? options.MaxNumTerms, @@ -514,7 +514,7 @@ private static TermMap CreateTermMapFromData(IHostEnvironment env, IChannel ch, /// This builds the instances per column. /// private static TermMap[] Train(IHostEnvironment env, IChannel ch, ColInfo[] infos, - IDataView keyData, ValueToKeyMappingEstimator.ColumnInfo[] columns, IDataView trainingData, bool autoConvert) + IDataView keyData, ValueToKeyMappingEstimator.ColumnOptions[] columns, IDataView trainingData, bool autoConvert) { Contracts.AssertValue(env); env.AssertValue(ch); diff --git a/src/Microsoft.ML.EntryPoints/FeatureCombiner.cs b/src/Microsoft.ML.EntryPoints/FeatureCombiner.cs index 3feff9e0f6..8160ee1e2a 100644 --- a/src/Microsoft.ML.EntryPoints/FeatureCombiner.cs +++ b/src/Microsoft.ML.EntryPoints/FeatureCombiner.cs @@ -58,7 +58,7 @@ public static CommonOutputs.TransformOutput PrepareFeatures(IHostEnvironment env throw ch.Except("No feature columns specified"); var featNames = new HashSet(); var concatNames = new List>(); - List cvt; + List cvt; int errCount; var ktv = ConvertFeatures(feats.ToArray(), featNames, concatNames, ch, out cvt, out errCount); Contracts.Assert(featNames.Count > 0); @@ -84,7 +84,7 @@ public static CommonOutputs.TransformOutput PrepareFeatures(IHostEnvironment env } } - private static IDataView ApplyKeyToVec(List ktv, IDataView viewTrain, IHost host) + private static IDataView ApplyKeyToVec(List ktv, IDataView viewTrain, IHost host) { Contracts.AssertValueOrNull(ktv); Contracts.AssertValue(viewTrain); @@ -107,7 +107,7 @@ private static IDataView ApplyKeyToVec(List new KeyToVectorMappingEstimator.ColumnInfo(c.Name, c.Name)).ToArray()).Transform(viewTrain); + viewTrain = new KeyToVectorMappingTransformer(host, ktv.Select(c => new KeyToVectorMappingEstimator.ColumnOptions(c.Name, c.Name)).ToArray()).Transform(viewTrain); } return viewTrain; } @@ -139,7 +139,7 @@ private static string GetTerms(IDataView data, string colName) return sb.ToString(); } - private static IDataView ApplyConvert(List cvt, IDataView viewTrain, IHostEnvironment env) + private static IDataView ApplyConvert(List cvt, IDataView viewTrain, IHostEnvironment env) { Contracts.AssertValueOrNull(cvt); Contracts.AssertValue(viewTrain); @@ -149,14 +149,14 @@ private static IDataView ApplyConvert(List c return viewTrain; } - private static List ConvertFeatures(IEnumerable feats, HashSet featNames, List> concatNames, IChannel ch, - out List cvt, out int errCount) + private static List ConvertFeatures(IEnumerable feats, HashSet featNames, List> concatNames, IChannel ch, + out List cvt, out int errCount) { Contracts.AssertValue(feats); Contracts.AssertValue(featNames); Contracts.AssertValue(concatNames); Contracts.AssertValue(ch); - List ktv = null; + List ktv = null; cvt = null; errCount = 0; foreach (var col in feats) @@ -174,7 +174,7 @@ private static IDataView ApplyConvert(List c { var colName = GetUniqueName(); concatNames.Add(new KeyValuePair(col.Name, colName)); - Utils.Add(ref ktv, new KeyToVectorMappingEstimator.ColumnInfo(colName, col.Name)); + Utils.Add(ref ktv, new KeyToVectorMappingEstimator.ColumnOptions(colName, col.Name)); continue; } } @@ -185,7 +185,7 @@ private static IDataView ApplyConvert(List c // This happens when the training is done on an XDF and the scoring is done on a data frame. var colName = GetUniqueName(); concatNames.Add(new KeyValuePair(col.Name, colName)); - Utils.Add(ref cvt, new TypeConvertingEstimator.ColumnInfo(colName, DataKind.Single, col.Name)); + Utils.Add(ref cvt, new TypeConvertingEstimator.ColumnOptions(colName, DataKind.Single, col.Name)); continue; } } @@ -300,7 +300,7 @@ public static CommonOutputs.TransformOutput PrepareRegressionLabel(IHostEnvironm return new CommonOutputs.TransformOutput { Model = new TransformModelImpl(env, nop, input.Data), OutputData = nop }; } - var xf = new TypeConvertingTransformer(host, new TypeConvertingEstimator.ColumnInfo(input.LabelColumn, DataKind.Single, input.LabelColumn)).Transform(input.Data); + var xf = new TypeConvertingTransformer(host, new TypeConvertingEstimator.ColumnOptions(input.LabelColumn, DataKind.Single, input.LabelColumn)).Transform(input.Data); return new CommonOutputs.TransformOutput { Model = new TransformModelImpl(env, xf, input.Data), OutputData = xf }; } } diff --git a/src/Microsoft.ML.FastTree/FastTree.cs b/src/Microsoft.ML.FastTree/FastTree.cs index 3a532f2506..240cf6d2cd 100644 --- a/src/Microsoft.ML.FastTree/FastTree.cs +++ b/src/Microsoft.ML.FastTree/FastTree.cs @@ -1373,7 +1373,7 @@ private Dataset Construct(RoleMappedData examples, ref int numExamples, int maxB } // Convert the group column, if one exists. if (examples.Schema.Group?.Name is string groupName) - data = new TypeConvertingTransformer(Host, new TypeConvertingEstimator.ColumnInfo(groupName, DataKind.UInt64, groupName)).Transform(data); + data = new TypeConvertingTransformer(Host, new TypeConvertingEstimator.ColumnOptions(groupName, DataKind.UInt64, groupName)).Transform(data); // Since we've passed it through a few transforms, reconstitute the mapping on the // newly transformed data. diff --git a/src/Microsoft.ML.HalLearners.StaticPipe/VectorWhiteningStaticExtensions.cs b/src/Microsoft.ML.HalLearners.StaticPipe/VectorWhiteningStaticExtensions.cs index 1ae13704e0..d67afd4754 100644 --- a/src/Microsoft.ML.HalLearners.StaticPipe/VectorWhiteningStaticExtensions.cs +++ b/src/Microsoft.ML.HalLearners.StaticPipe/VectorWhiteningStaticExtensions.cs @@ -47,9 +47,9 @@ public override IEstimator Reconcile(IHostEnvironment env, { Contracts.Assert(toOutput.Length == 1); - var infos = new VectorWhiteningEstimator.ColumnInfo[toOutput.Length]; + var infos = new VectorWhiteningEstimator.ColumnOptions[toOutput.Length]; for (int i = 0; i < toOutput.Length; i++) - infos[i] = new VectorWhiteningEstimator.ColumnInfo(outputNames[toOutput[i]], inputNames[((OutPipelineColumn)toOutput[i]).Input], _kind, _eps, _maxRows, _pcaNum); + infos[i] = new VectorWhiteningEstimator.ColumnOptions(outputNames[toOutput[i]], inputNames[((OutPipelineColumn)toOutput[i]).Input], _kind, _eps, _maxRows, _pcaNum); return new VectorWhiteningEstimator(env, infos); } diff --git a/src/Microsoft.ML.HalLearners/HalLearnersCatalog.cs b/src/Microsoft.ML.HalLearners/HalLearnersCatalog.cs index 87e2851cbc..b92b8cfb8e 100644 --- a/src/Microsoft.ML.HalLearners/HalLearnersCatalog.cs +++ b/src/Microsoft.ML.HalLearners/HalLearnersCatalog.cs @@ -155,11 +155,11 @@ public static VectorWhiteningEstimator VectorWhiten(this TransformsCatalog.Proje /// /// /// /// /// - public static VectorWhiteningEstimator VectorWhiten(this TransformsCatalog.ProjectionTransforms catalog, params VectorWhiteningEstimator.ColumnInfo[] columns) + public static VectorWhiteningEstimator VectorWhiten(this TransformsCatalog.ProjectionTransforms catalog, params VectorWhiteningEstimator.ColumnOptions[] columns) => new VectorWhiteningEstimator(CatalogUtils.GetEnvironment(catalog), columns); } diff --git a/src/Microsoft.ML.HalLearners/VectorWhitening.cs b/src/Microsoft.ML.HalLearners/VectorWhitening.cs index 0f19730c63..f564dda364 100644 --- a/src/Microsoft.ML.HalLearners/VectorWhitening.cs +++ b/src/Microsoft.ML.HalLearners/VectorWhitening.cs @@ -136,7 +136,7 @@ private static VersionInfo GetVersionInfo() loaderAssemblyName: typeof(VectorWhiteningTransformer).Assembly.FullName); } - private readonly VectorWhiteningEstimator.ColumnInfo[] _columns; + private readonly VectorWhiteningEstimator.ColumnOptions[] _columns; /// /// Initializes a new object. @@ -145,7 +145,7 @@ private static VersionInfo GetVersionInfo() /// An array of whitening matrices where models[i] is learned from the i-th element of . /// An array of inverse whitening matrices, the i-th element being the inverse matrix of models[i]. /// Describes the parameters of the whitening process for each column pair. - internal VectorWhiteningTransformer(IHostEnvironment env, float[][] models, float[][] invModels, params VectorWhiteningEstimator.ColumnInfo[] columns) + internal VectorWhiteningTransformer(IHostEnvironment env, float[][] models, float[][] invModels, params VectorWhiteningEstimator.ColumnOptions[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(VectorWhiteningTransformer)), GetColumnPairs(columns)) { Host.AssertNonEmpty(ColumnPairs); @@ -162,15 +162,15 @@ private VectorWhiteningTransformer(IHostEnvironment env, ModelLoadContext ctx) // *** Binary format *** // // foreach column pair - // ColumnInfo + // ColumnOptions // foreach model // whitening matrix // recovery matrix Host.AssertNonEmpty(ColumnPairs); - _columns = new VectorWhiteningEstimator.ColumnInfo[ColumnPairs.Length]; + _columns = new VectorWhiteningEstimator.ColumnOptions[ColumnPairs.Length]; for (int i = 0; i < _columns.Length; i++) - _columns[i] = new VectorWhiteningEstimator.ColumnInfo(ctx); + _columns[i] = new VectorWhiteningEstimator.ColumnOptions(ctx); _models = new float[ColumnPairs.Length][]; _invModels = new float[ColumnPairs.Length][]; @@ -193,7 +193,7 @@ internal static VectorWhiteningTransformer Create(IHostEnvironment env, ModelLoa // Factory method for SignatureDataTransform. internal static IDataTransform Create(IHostEnvironment env, Options options, IDataView input) { - var infos = options.Columns.Select(colPair => new VectorWhiteningEstimator.ColumnInfo(colPair, options)).ToArray(); + var infos = options.Columns.Select(colPair => new VectorWhiteningEstimator.ColumnOptions(colPair, options)).ToArray(); (var models, var invModels) = TrainVectorWhiteningTransform(env, input, infos); return new VectorWhiteningTransformer(env, models, invModels, infos).MakeDataTransform(input); } @@ -206,7 +206,7 @@ internal static IDataTransform Create(IHostEnvironment env, ModelLoadContext ctx internal static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, DataViewSchema inputSchema) => Create(env, ctx).MakeRowMapper(inputSchema); - private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(VectorWhiteningEstimator.ColumnInfo[] columns) + private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(VectorWhiteningEstimator.ColumnOptions[] columns) => columns.Select(c => (c.Name, c.InputColumnName ?? c.Name)).ToArray(); private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) @@ -243,7 +243,7 @@ private static void ValidateModel(IExceptionContext ectx, float[] model, DataVie // Sometime GetRowCount doesn't really return the number of rows in the associated IDataView. // A more reliable solution is to turely iterate through all rows via a RowCursor. - private static long GetRowCount(IDataView inputData, params VectorWhiteningEstimator.ColumnInfo[] columns) + private static long GetRowCount(IDataView inputData, params VectorWhiteningEstimator.ColumnOptions[] columns) { long? rows = inputData.GetRowCount(); if (rows != null) @@ -260,7 +260,7 @@ private static long GetRowCount(IDataView inputData, params VectorWhiteningEstim } // Computes the transformation matrices needed for whitening process from training data. - internal static (float[][] models, float[][] invModels) TrainVectorWhiteningTransform(IHostEnvironment env, IDataView inputData, params VectorWhiteningEstimator.ColumnInfo[] columns) + internal static (float[][] models, float[][] invModels) TrainVectorWhiteningTransform(IHostEnvironment env, IDataView inputData, params VectorWhiteningEstimator.ColumnOptions[] columns) { var models = new float[columns.Length][]; var invModels = new float[columns.Length][]; @@ -276,7 +276,7 @@ internal static (float[][] models, float[][] invModels) TrainVectorWhiteningTran } // Extracts the indices and types of the input columns to the whitening transform. - private static void GetColTypesAndIndex(IHostEnvironment env, IDataView inputData, VectorWhiteningEstimator.ColumnInfo[] columns, out DataViewType[] srcTypes, out int[] cols) + private static void GetColTypesAndIndex(IHostEnvironment env, IDataView inputData, VectorWhiteningEstimator.ColumnOptions[] columns, out DataViewType[] srcTypes, out int[] cols) { cols = new int[columns.Length]; srcTypes = new DataViewType[columns.Length]; @@ -298,7 +298,7 @@ private static void GetColTypesAndIndex(IHostEnvironment env, IDataView inputDat // Loads all relevant data for whitening training into memory. private static float[][] LoadDataAsDense(IHostEnvironment env, IChannel ch, IDataView inputData, out int[] actualRowCounts, - DataViewType[] srcTypes, int[] cols, params VectorWhiteningEstimator.ColumnInfo[] columns) + DataViewType[] srcTypes, int[] cols, params VectorWhiteningEstimator.ColumnOptions[] columns) { long crowData = GetRowCount(inputData, columns); @@ -365,7 +365,7 @@ private static float[][] LoadDataAsDense(IHostEnvironment env, IChannel ch, IDat // will have dimension input_vec_size x input_vec_size. In the getter, the matrix will be truncated to only keep // PcaNum columns, and thus produce the desired output size. private static void TrainModels(IHostEnvironment env, IChannel ch, float[][] columnData, int[] rowCounts, - ref float[][] models, ref float[][] invModels, DataViewType[] srcTypes, params VectorWhiteningEstimator.ColumnInfo[] columns) + ref float[][] models, ref float[][] invModels, DataViewType[] srcTypes, params VectorWhiteningEstimator.ColumnOptions[] columns) { ch.Assert(columnData.Length == rowCounts.Length); @@ -471,7 +471,7 @@ private protected override void SaveModel(ModelSaveContext ctx) // *** Binary format *** // // foreach column pair - // ColumnInfo + // ColumnOptions // foreach model // whitening matrix // recovery matrix @@ -683,7 +683,7 @@ internal static class Defaults /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnInfo + public sealed class ColumnOptions { /// /// Name of the column resulting from the transformation of . @@ -720,7 +720,7 @@ public sealed class ColumnInfo /// Whitening constant, prevents division by zero. /// Maximum number of rows used to train the transform. /// In case of PCA whitening, indicates the number of components to retain. - public ColumnInfo(string name, string inputColumnName = null, WhiteningKind kind = Defaults.Kind, float eps = Defaults.Eps, + public ColumnOptions(string name, string inputColumnName = null, WhiteningKind kind = Defaults.Kind, float eps = Defaults.Eps, int maxRows = Defaults.MaxRows, int pcaNum = Defaults.PcaNum) { Name = name; @@ -738,7 +738,7 @@ public ColumnInfo(string name, string inputColumnName = null, WhiteningKind kind Contracts.CheckUserArg(PcaNum >= 0, nameof(PcaNum)); } - internal ColumnInfo(VectorWhiteningTransformer.Column item, VectorWhiteningTransformer.Options options) + internal ColumnOptions(VectorWhiteningTransformer.Column item, VectorWhiteningTransformer.Options options) { Name = item.Name; Contracts.CheckValue(Name, nameof(Name)); @@ -755,7 +755,7 @@ internal ColumnInfo(VectorWhiteningTransformer.Column item, VectorWhiteningTrans Contracts.CheckUserArg(PcaNum >= 0, nameof(item.PcaNum)); } - internal ColumnInfo(ModelLoadContext ctx) + internal ColumnOptions(ModelLoadContext ctx) { Contracts.AssertValue(ctx); @@ -799,12 +799,12 @@ internal void Save(ModelSaveContext ctx) } private readonly IHost _host; - private readonly ColumnInfo[] _infos; + private readonly ColumnOptions[] _infos; /// /// The environment. /// Describes the parameters of the whitening process for each column pair. - internal VectorWhiteningEstimator(IHostEnvironment env, params ColumnInfo[] columns) + internal VectorWhiteningEstimator(IHostEnvironment env, params ColumnOptions[] columns) { _host = Contracts.CheckRef(env, nameof(env)).Register(nameof(VectorWhiteningEstimator)); _infos = columns; @@ -823,7 +823,7 @@ internal VectorWhiteningEstimator(IHostEnvironment env, string outputColumnName, float eps = Defaults.Eps, int maxRows = Defaults.MaxRows, int pcaNum = Defaults.PcaNum) - : this(env, new ColumnInfo(outputColumnName, inputColumnName, kind, eps, maxRows, pcaNum)) + : this(env, new ColumnOptions(outputColumnName, inputColumnName, kind, eps, maxRows, pcaNum)) { } diff --git a/src/Microsoft.ML.ImageAnalytics/ExtensionsCatalog.cs b/src/Microsoft.ML.ImageAnalytics/ExtensionsCatalog.cs index 7f1512d2f4..da85b0e39b 100644 --- a/src/Microsoft.ML.ImageAnalytics/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.ImageAnalytics/ExtensionsCatalog.cs @@ -18,8 +18,8 @@ public static class ImageEstimatorsCatalog /// [!code-csharp[ConvertToGrayscale](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ImageAnalytics/ConvertToGrayscale.cs)] /// ]]> /// - public static ImageGrayscalingEstimator ConvertToGrayscale(this TransformsCatalog catalog, params SimpleColumnInfo[] columnPairs) - => new ImageGrayscalingEstimator(CatalogUtils.GetEnvironment(catalog), SimpleColumnInfo.ConvertToValueTuples(columnPairs)); + public static ImageGrayscalingEstimator ConvertToGrayscale(this TransformsCatalog catalog, params ColumnOptions[] columnPairs) + => new ImageGrayscalingEstimator(CatalogUtils.GetEnvironment(catalog), ColumnOptions.ConvertToValueTuples(columnPairs)); /// /// Loads the images from the into memory. @@ -41,8 +41,8 @@ public static ImageGrayscalingEstimator ConvertToGrayscale(this TransformsCatalo /// [!code-csharp[LoadImages](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ImageAnalytics/LoadImages.cs)] /// ]]> /// - public static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog, string imageFolder, params SimpleColumnInfo[] columnPairs) - => new ImageLoadingEstimator(CatalogUtils.GetEnvironment(catalog), imageFolder, SimpleColumnInfo.ConvertToValueTuples(columnPairs)); + public static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog, string imageFolder, params ColumnOptions[] columnPairs) + => new ImageLoadingEstimator(CatalogUtils.GetEnvironment(catalog), imageFolder, ColumnOptions.ConvertToValueTuples(columnPairs)); /// /// The transform's catalog. @@ -75,7 +75,7 @@ public static ImagePixelExtractingEstimator ExtractPixels(this TransformsCatalog /// /// The transform's catalog. /// The name of the columns containing the images, and per-column configurations. - public static ImagePixelExtractingEstimator ExtractPixels(this TransformsCatalog catalog, params ImagePixelExtractingEstimator.ColumnInfo[] columns) + public static ImagePixelExtractingEstimator ExtractPixels(this TransformsCatalog catalog, params ImagePixelExtractingEstimator.ColumnOptions[] columns) => new ImagePixelExtractingEstimator(CatalogUtils.GetEnvironment(catalog), columns); /// @@ -121,7 +121,7 @@ public static ImageResizingEstimator ResizeImages(this TransformsCatalog catalog /// Those pre-trained models have a defined width and height for their input images, so often, after getting loaded, the images will need to get resized before /// further processing. /// The new width and height, as well as other properties of resizing, like type of scaling (uniform, or non-uniform), and whether to pad the image, - /// or just crop it can be specified separately for each column loaded, through the . + /// or just crop it can be specified separately for each column loaded, through the . /// /// /// @@ -133,7 +133,7 @@ public static ImageResizingEstimator ResizeImages(this TransformsCatalog catalog /// [!code-csharp[ResizeImages](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ImageAnalytics/ResizeImages.cs)] /// ]]> /// - public static ImageResizingEstimator ResizeImages(this TransformsCatalog catalog, params ImageResizingEstimator.ColumnInfo[] columns) + public static ImageResizingEstimator ResizeImages(this TransformsCatalog catalog, params ImageResizingEstimator.ColumnOptions[] columns) => new ImageResizingEstimator(CatalogUtils.GetEnvironment(catalog), columns); /// @@ -141,7 +141,7 @@ public static ImageResizingEstimator ResizeImages(this TransformsCatalog catalog /// /// The transform's catalog. /// The name of the columns containing the pixels, and per-column configurations. - public static VectorToImageConvertingEstimator ConvertToImage(this TransformsCatalog catalog, params VectorToImageConvertingEstimator.ColumnInfo[] columns) + public static VectorToImageConvertingEstimator ConvertToImage(this TransformsCatalog catalog, params VectorToImageConvertingEstimator.ColumnOptions[] columns) => new VectorToImageConvertingEstimator(CatalogUtils.GetEnvironment(catalog), columns); /// diff --git a/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractor.cs b/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractor.cs index bc7b0c2359..8342f3a139 100644 --- a/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractor.cs +++ b/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractor.cs @@ -37,7 +37,7 @@ namespace Microsoft.ML.ImageAnalytics /// /// During the transformation, the columns of are converted them into a vector representing the image pixels /// than can be further used as features by the algorithms added to the pipeline. - /// + /// /// /// /// @@ -150,12 +150,12 @@ private static VersionInfo GetVersionInfo() private const string RegistrationName = "ImagePixelExtractor"; - private readonly ImagePixelExtractingEstimator.ColumnInfo[] _columns; + private readonly ImagePixelExtractingEstimator.ColumnOptions[] _columns; /// /// The columns passed to this . /// - public IReadOnlyCollection Columns => _columns.AsReadOnly(); + public IReadOnlyCollection Columns => _columns.AsReadOnly(); /// /// Extract pixels values from image and produce array of values. @@ -179,7 +179,7 @@ internal ImagePixelExtractingTransformer(IHostEnvironment env, float offset = ImagePixelExtractingEstimator.Defaults.Offset, float scale = ImagePixelExtractingEstimator.Defaults.Scale, bool asFloat = ImagePixelExtractingEstimator.Defaults.Convert) - : this(env, new ImagePixelExtractingEstimator.ColumnInfo(outputColumnName, inputColumnName, colors, order, interleave, offset, scale, asFloat)) + : this(env, new ImagePixelExtractingEstimator.ColumnOptions(outputColumnName, inputColumnName, colors, order, interleave, offset, scale, asFloat)) { } @@ -188,13 +188,13 @@ internal ImagePixelExtractingTransformer(IHostEnvironment env, /// /// The host environment. /// Describes the parameters of pixel extraction for each column pair. - internal ImagePixelExtractingTransformer(IHostEnvironment env, params ImagePixelExtractingEstimator.ColumnInfo[] columns) + internal ImagePixelExtractingTransformer(IHostEnvironment env, params ImagePixelExtractingEstimator.ColumnOptions[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(RegistrationName), GetColumnPairs(columns)) { _columns = columns.ToArray(); } - private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(ImagePixelExtractingEstimator.ColumnInfo[] columns) + private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(ImagePixelExtractingEstimator.ColumnOptions[] columns) { Contracts.CheckValue(columns, nameof(columns)); return columns.Select(x => (x.Name, x.InputColumnName)).ToArray(); @@ -209,11 +209,11 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa env.CheckValue(options.Columns, nameof(options.Columns)); - var columns = new ImagePixelExtractingEstimator.ColumnInfo[options.Columns.Length]; + var columns = new ImagePixelExtractingEstimator.ColumnOptions[options.Columns.Length]; for (int i = 0; i < columns.Length; i++) { var item = options.Columns[i]; - columns[i] = new ImagePixelExtractingEstimator.ColumnInfo(item, options); + columns[i] = new ImagePixelExtractingEstimator.ColumnOptions(item, options); } var transformer = new ImagePixelExtractingTransformer(env, columns); @@ -238,11 +238,11 @@ private ImagePixelExtractingTransformer(IHost host, ModelLoadContext ctx) // // for each added column - // ColumnInfo + // ColumnOptions - _columns = new ImagePixelExtractingEstimator.ColumnInfo[ColumnPairs.Length]; + _columns = new ImagePixelExtractingEstimator.ColumnOptions[ColumnPairs.Length]; for (int i = 0; i < _columns.Length; i++) - _columns[i] = new ImagePixelExtractingEstimator.ColumnInfo(ColumnPairs[i].outputColumnName, ColumnPairs[i].inputColumnName, ctx); + _columns[i] = new ImagePixelExtractingEstimator.ColumnOptions(ColumnPairs[i].outputColumnName, ColumnPairs[i].inputColumnName, ctx); } // Factory method for SignatureLoadDataTransform. @@ -264,7 +264,7 @@ private protected override void SaveModel(ModelSaveContext ctx) // // for each added column - // ColumnInfo + // ColumnOptions base.SaveColumns(ctx); @@ -485,7 +485,7 @@ private VectorType[] ConstructTypes() /// /// /// Calling in this estimator, produces an . - /// + /// /// /// /// @@ -564,7 +564,7 @@ internal static void GetOrder(ColorsOrder order, ColorBits colors, out int a, ou /// /// Describes how the transformer handles one image pixel extraction column pair. /// - public sealed class ColumnInfo + public sealed class ColumnOptions { /// Name of the column resulting from the transformation of . public readonly string Name; @@ -595,7 +595,7 @@ public sealed class ColumnInfo internal readonly byte Planes; - internal ColumnInfo(ImagePixelExtractingTransformer.Column item, ImagePixelExtractingTransformer.Options options) + internal ColumnOptions(ImagePixelExtractingTransformer.Column item, ImagePixelExtractingTransformer.Options options) { Contracts.CheckValue(item, nameof(item)); Contracts.CheckValue(options, nameof(options)); @@ -638,7 +638,7 @@ internal ColumnInfo(ImagePixelExtractingTransformer.Column item, ImagePixelExtra /// Offset color pixel value by this amount. Applied to color value first. /// Scale color pixel value by this amount. Applied to color value second. /// Output array as float array. If false, output as byte array and ignores and . - public ColumnInfo(string name, + public ColumnOptions(string name, string inputColumnName = null, ColorBits colors = Defaults.Colors, ColorsOrder order = Defaults.Order, @@ -676,7 +676,7 @@ public ColumnInfo(string name, Contracts.CheckParam(FloatUtils.IsFiniteNonZero(Scale), nameof(scale)); } - internal ColumnInfo(string name, string inputColumnName, ModelLoadContext ctx) + internal ColumnOptions(string name, string inputColumnName, ModelLoadContext ctx) { Contracts.AssertNonEmpty(name); Contracts.AssertNonEmpty(inputColumnName); @@ -784,7 +784,7 @@ internal ImagePixelExtractingEstimator(IHostEnvironment env, /// /// The host environment. /// Describes the parameters of pixel extraction for each column pair. - internal ImagePixelExtractingEstimator(IHostEnvironment env, params ColumnInfo[] columns) + internal ImagePixelExtractingEstimator(IHostEnvironment env, params ColumnOptions[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ImagePixelExtractingEstimator)), new ImagePixelExtractingTransformer(env, columns)) { } diff --git a/src/Microsoft.ML.ImageAnalytics/ImageResizer.cs b/src/Microsoft.ML.ImageAnalytics/ImageResizer.cs index 9862678eb8..a40e5951ef 100644 --- a/src/Microsoft.ML.ImageAnalytics/ImageResizer.cs +++ b/src/Microsoft.ML.ImageAnalytics/ImageResizer.cs @@ -37,7 +37,7 @@ namespace Microsoft.ML.ImageAnalytics /// /// /// Calling resizes the images to a new height and width. - /// + /// /// /// /// @@ -115,12 +115,12 @@ private static VersionInfo GetVersionInfo() private const string RegistrationName = "ImageScaler"; - private readonly ImageResizingEstimator.ColumnInfo[] _columns; + private readonly ImageResizingEstimator.ColumnOptions[] _columns; /// /// The columns passed to this . /// - public IReadOnlyCollection Columns => _columns.AsReadOnly(); + public IReadOnlyCollection Columns => _columns.AsReadOnly(); /// /// Resize image. @@ -136,7 +136,7 @@ internal ImageResizingTransformer(IHostEnvironment env, string outputColumnName, int imageWidth, int imageHeight, string inputColumnName = null, ImageResizingEstimator.ResizingKind resizing = ImageResizingEstimator.ResizingKind.IsoCrop, ImageResizingEstimator.Anchor cropAnchor = ImageResizingEstimator.Anchor.Center) - : this(env, new ImageResizingEstimator.ColumnInfo(outputColumnName, imageWidth, imageHeight, inputColumnName, resizing, cropAnchor)) + : this(env, new ImageResizingEstimator.ColumnOptions(outputColumnName, imageWidth, imageHeight, inputColumnName, resizing, cropAnchor)) { } @@ -145,13 +145,13 @@ internal ImageResizingTransformer(IHostEnvironment env, string outputColumnName, /// /// The host environment. /// Describes the parameters of image resizing for each column pair. - internal ImageResizingTransformer(IHostEnvironment env, params ImageResizingEstimator.ColumnInfo[] columns) + internal ImageResizingTransformer(IHostEnvironment env, params ImageResizingEstimator.ColumnOptions[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(RegistrationName), GetColumnPairs(columns)) { _columns = columns.ToArray(); } - private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(ImageResizingEstimator.ColumnInfo[] columns) + private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(ImageResizingEstimator.ColumnOptions[] columns) { Contracts.CheckValue(columns, nameof(columns)); return columns.Select(x => (x.Name, x.InputColumnName)).ToArray(); @@ -166,11 +166,11 @@ internal static IDataTransform Create(IHostEnvironment env, Arguments args, IDat env.CheckValue(args.Columns, nameof(args.Columns)); - var cols = new ImageResizingEstimator.ColumnInfo[args.Columns.Length]; + var cols = new ImageResizingEstimator.ColumnOptions[args.Columns.Length]; for (int i = 0; i < cols.Length; i++) { var item = args.Columns[i]; - cols[i] = new ImageResizingEstimator.ColumnInfo( + cols[i] = new ImageResizingEstimator.ColumnOptions( item.Name, item.ImageWidth ?? args.ImageWidth, item.ImageHeight ?? args.ImageHeight, @@ -206,7 +206,7 @@ private ImageResizingTransformer(IHost host, ModelLoadContext ctx) // byte: scaling kind // byte: anchor - _columns = new ImageResizingEstimator.ColumnInfo[ColumnPairs.Length]; + _columns = new ImageResizingEstimator.ColumnOptions[ColumnPairs.Length]; for (int i = 0; i < ColumnPairs.Length; i++) { int width = ctx.Reader.ReadInt32(); @@ -217,7 +217,7 @@ private ImageResizingTransformer(IHost host, ModelLoadContext ctx) Host.CheckDecode(Enum.IsDefined(typeof(ImageResizingEstimator.ResizingKind), scale)); var anchor = (ImageResizingEstimator.Anchor)ctx.Reader.ReadByte(); Host.CheckDecode(Enum.IsDefined(typeof(ImageResizingEstimator.Anchor), anchor)); - _columns[i] = new ImageResizingEstimator.ColumnInfo(ColumnPairs[i].outputColumnName, width, height, ColumnPairs[i].inputColumnName, scale, anchor); + _columns[i] = new ImageResizingEstimator.ColumnOptions(ColumnPairs[i].outputColumnName, width, height, ColumnPairs[i].inputColumnName, scale, anchor); } } @@ -410,7 +410,7 @@ protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func /// /// Calling in this estimator, produces an . - /// + /// /// /// /// @@ -462,7 +462,7 @@ public enum Anchor : byte /// /// Describes how the transformer handles one image resize column. /// - public sealed class ColumnInfo + public sealed class ColumnOptions { /// Name of the column resulting from the transformation of public readonly string Name; @@ -494,7 +494,7 @@ public sealed class ColumnInfo /// Name of column to transform. If set to , the value of the will be used as source. /// What to use. /// If set to what anchor to use for cropping. - public ColumnInfo(string name, + public ColumnOptions(string name, int width, int height, string inputColumnName = null, @@ -543,7 +543,7 @@ internal ImageResizingEstimator(IHostEnvironment env, /// /// The host environment. /// Describes the parameters of image resizing for each column pair. - internal ImageResizingEstimator(IHostEnvironment env, params ColumnInfo[] columns) + internal ImageResizingEstimator(IHostEnvironment env, params ColumnOptions[] columns) : this(env, new ImageResizingTransformer(env, columns)) { } diff --git a/src/Microsoft.ML.ImageAnalytics/VectorToImageTransform.cs b/src/Microsoft.ML.ImageAnalytics/VectorToImageTransform.cs index c18c3c4811..86a8041ad7 100644 --- a/src/Microsoft.ML.ImageAnalytics/VectorToImageTransform.cs +++ b/src/Microsoft.ML.ImageAnalytics/VectorToImageTransform.cs @@ -34,7 +34,7 @@ namespace Microsoft.ML.ImageAnalytics /// produced by fitting the to an . /// /// - /// + /// /// /// /// @@ -175,14 +175,14 @@ private static VersionInfo GetVersionInfo() private const string RegistrationName = "VectorToImageConverter"; - private readonly VectorToImageConvertingEstimator.ColumnInfo[] _columns; + private readonly VectorToImageConvertingEstimator.ColumnOptions[] _columns; /// /// The columns passed to this . /// - public IReadOnlyCollection Columns => _columns.AsReadOnly(); + public IReadOnlyCollection Columns => _columns.AsReadOnly(); - internal VectorToImageConvertingTransformer(IHostEnvironment env, params VectorToImageConvertingEstimator.ColumnInfo[] columns) + internal VectorToImageConvertingTransformer(IHostEnvironment env, params VectorToImageConvertingEstimator.ColumnOptions[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(RegistrationName), GetColumnPairs(columns)) { Host.AssertNonEmpty(columns); @@ -217,7 +217,7 @@ internal VectorToImageConvertingTransformer(IHostEnvironment env, string outputC int defaultRed = VectorToImageConvertingEstimator.Defaults.DefaultRed, int defaultGreen = VectorToImageConvertingEstimator.Defaults.DefaultGreen, int defaultBlue = VectorToImageConvertingEstimator.Defaults.DefaultBlue) - : this(env, new VectorToImageConvertingEstimator.ColumnInfo(outputColumnName, height, width, inputColumnName, colors, order, interleave, scale, offset, defaultAlpha, defaultRed, defaultGreen, defaultBlue)) + : this(env, new VectorToImageConvertingEstimator.ColumnOptions(outputColumnName, height, width, inputColumnName, colors, order, interleave, scale, offset, defaultAlpha, defaultRed, defaultGreen, defaultBlue)) { } @@ -230,11 +230,11 @@ internal static IDataTransform Create(IHostEnvironment env, Options args, IDataV env.CheckValue(args.Columns, nameof(args.Columns)); - var columns = new VectorToImageConvertingEstimator.ColumnInfo[args.Columns.Length]; + var columns = new VectorToImageConvertingEstimator.ColumnOptions[args.Columns.Length]; for (int i = 0; i < columns.Length; i++) { var item = args.Columns[i]; - columns[i] = new VectorToImageConvertingEstimator.ColumnInfo(item, args); + columns[i] = new VectorToImageConvertingEstimator.ColumnOptions(item, args); } var transformer = new VectorToImageConvertingTransformer(env, columns); @@ -249,11 +249,11 @@ private VectorToImageConvertingTransformer(IHost host, ModelLoadContext ctx) // *** Binary format *** // // foreach added column - // ColumnInfo + // ColumnOptions - _columns = new VectorToImageConvertingEstimator.ColumnInfo[ColumnPairs.Length]; + _columns = new VectorToImageConvertingEstimator.ColumnOptions[ColumnPairs.Length]; for (int i = 0; i < _columns.Length; i++) - _columns[i] = new VectorToImageConvertingEstimator.ColumnInfo(ColumnPairs[i].outputColumnName, ColumnPairs[i].inputColumnName, ctx); + _columns[i] = new VectorToImageConvertingEstimator.ColumnOptions(ColumnPairs[i].outputColumnName, ColumnPairs[i].inputColumnName, ctx); } private static VectorToImageConvertingTransformer Create(IHostEnvironment env, ModelLoadContext ctx) @@ -295,7 +295,7 @@ private protected override void SaveModel(ModelSaveContext ctx) _columns[i].Save(ctx); } - private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(VectorToImageConvertingEstimator.ColumnInfo[] columns) + private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(VectorToImageConvertingEstimator.ColumnOptions[] columns) { Contracts.CheckValue(columns, nameof(columns)); return columns.Select(x => (x.Name, x.InputColumnName)).ToArray(); @@ -350,7 +350,7 @@ protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func GetterFromType(PrimitiveDataViewType srcType, DataViewRow input, int iinfo, - VectorToImageConvertingEstimator.ColumnInfo ex, bool needScale) where TValue : IConvertible + VectorToImageConvertingEstimator.ColumnOptions ex, bool needScale) where TValue : IConvertible { Contracts.Assert(typeof(TValue) == srcType.RawType); var getSrc = RowCursorUtils.GetVecGetterAs(srcType, input, ColMapNewToOld[iinfo]); @@ -421,7 +421,7 @@ private ValueGetter GetterFromType(PrimitiveDataViewType srcType }; } - private static ImageType[] ConstructTypes(VectorToImageConvertingEstimator.ColumnInfo[] columns) + private static ImageType[] ConstructTypes(VectorToImageConvertingEstimator.ColumnOptions[] columns) { return columns.Select(c => new ImageType(c.Height, c.Width)).ToArray(); } @@ -433,7 +433,7 @@ private static ImageType[] ConstructTypes(VectorToImageConvertingEstimator.Colum /// /// /// Calling in this estimator, produces an . - /// + /// /// /// /// @@ -451,7 +451,7 @@ internal static class Defaults /// /// Describes how the transformer handles one image pixel extraction column pair. /// - public sealed class ColumnInfo + public sealed class ColumnOptions { /// Name of the column resulting from the transformation of . public readonly string Name; @@ -479,7 +479,7 @@ public sealed class ColumnInfo public bool Green => (Colors & ImagePixelExtractingEstimator.ColorBits.Green) != 0; public bool Blue => (Colors & ImagePixelExtractingEstimator.ColorBits.Blue) != 0; - internal ColumnInfo(VectorToImageConvertingTransformer.Column item, VectorToImageConvertingTransformer.Options args) + internal ColumnOptions(VectorToImageConvertingTransformer.Column item, VectorToImageConvertingTransformer.Options args) { Contracts.CheckValue(item, nameof(item)); Contracts.CheckValue(args, nameof(args)); @@ -508,7 +508,7 @@ internal ColumnInfo(VectorToImageConvertingTransformer.Column item, VectorToImag Contracts.CheckUserArg(FloatUtils.IsFiniteNonZero(Scale), nameof(item.Scale)); } - internal ColumnInfo(string outputColumnName, string inputColumnName, ModelLoadContext ctx) + internal ColumnOptions(string outputColumnName, string inputColumnName, ModelLoadContext ctx) { Contracts.AssertNonEmpty(outputColumnName); Contracts.AssertNonEmpty(inputColumnName); @@ -588,7 +588,7 @@ internal ColumnInfo(string outputColumnName, string inputColumnName, ModelLoadCo /// Default value for red color, would be overriden if contains . /// Default value for grenn color, would be overriden if contains . /// Default value for blue color, would be overriden if contains . - public ColumnInfo(string name, + public ColumnOptions(string name, int height, int width, string inputColumnName = null, ImagePixelExtractingEstimator.ColorBits colors = ImagePixelExtractingEstimator.Defaults.Colors, @@ -717,7 +717,7 @@ internal VectorToImageConvertingEstimator(IHostEnvironment env, /// /// The host environment. /// Describes the parameters of pixel extraction for each column pair. - internal VectorToImageConvertingEstimator(IHostEnvironment env, params ColumnInfo[] columns) + internal VectorToImageConvertingEstimator(IHostEnvironment env, params ColumnOptions[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(VectorToImageConvertingEstimator)), new VectorToImageConvertingTransformer(env, columns)) { } diff --git a/src/Microsoft.ML.PCA/PCACatalog.cs b/src/Microsoft.ML.PCA/PCACatalog.cs index ed2e9c63ef..d410122077 100644 --- a/src/Microsoft.ML.PCA/PCACatalog.cs +++ b/src/Microsoft.ML.PCA/PCACatalog.cs @@ -34,7 +34,7 @@ public static PrincipalComponentAnalysisEstimator ProjectToPrincipalComponents(t /// Initializes a new instance of . /// The transform's catalog. /// Input columns to apply PrincipalComponentAnalysis on. - public static PrincipalComponentAnalysisEstimator ProjectToPrincipalComponents(this TransformsCatalog.ProjectionTransforms catalog, params PrincipalComponentAnalysisEstimator.ColumnInfo[] columns) + public static PrincipalComponentAnalysisEstimator ProjectToPrincipalComponents(this TransformsCatalog.ProjectionTransforms catalog, params PrincipalComponentAnalysisEstimator.ColumnOptions[] columns) => new PrincipalComponentAnalysisEstimator(CatalogUtils.GetEnvironment(catalog), columns); /// diff --git a/src/Microsoft.ML.PCA/PcaTransformer.cs b/src/Microsoft.ML.PCA/PcaTransformer.cs index 644a581317..fcab7e2b7c 100644 --- a/src/Microsoft.ML.PCA/PcaTransformer.cs +++ b/src/Microsoft.ML.PCA/PcaTransformer.cs @@ -201,7 +201,7 @@ private static VersionInfo GetVersionInfo() private const string RegistrationName = "Pca"; - internal PrincipalComponentAnalysisTransformer(IHostEnvironment env, IDataView input, PrincipalComponentAnalysisEstimator.ColumnInfo[] columns) + internal PrincipalComponentAnalysisTransformer(IHostEnvironment env, IDataView input, PrincipalComponentAnalysisEstimator.ColumnOptions[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(PrincipalComponentAnalysisTransformer)), GetColumnPairs(columns)) { Host.AssertNonEmpty(ColumnPairs); @@ -251,7 +251,7 @@ private static IDataTransform Create(IHostEnvironment env, Options options, IDat env.CheckValue(options, nameof(options)); env.CheckValue(input, nameof(input)); env.CheckValue(options.Columns, nameof(options.Columns)); - var cols = options.Columns.Select(item => new PrincipalComponentAnalysisEstimator.ColumnInfo( + var cols = options.Columns.Select(item => new PrincipalComponentAnalysisEstimator.ColumnOptions( item.Name, item.Source, item.WeightColumn, @@ -291,13 +291,13 @@ private protected override void SaveModel(ModelSaveContext ctx) for (int i = 0; i < _transformInfos.Length; i++) _transformInfos[i].Save(ctx); } - private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(PrincipalComponentAnalysisEstimator.ColumnInfo[] columns) + private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(PrincipalComponentAnalysisEstimator.ColumnOptions[] columns) { Contracts.CheckValue(columns, nameof(columns)); return columns.Select(x => (x.Name, x.InputColumnName)).ToArray(); } - private void Train(PrincipalComponentAnalysisEstimator.ColumnInfo[] columns, TransformInfo[] transformInfos, IDataView trainingData) + private void Train(PrincipalComponentAnalysisEstimator.ColumnOptions[] columns, TransformInfo[] transformInfos, IDataView trainingData) { var y = new float[_numColumns][][]; var omega = new float[_numColumns][][]; @@ -631,7 +631,7 @@ internal static class Defaults /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnInfo + public sealed class ColumnOptions { /// /// Name of the column resulting from the transformation of . @@ -673,7 +673,7 @@ public sealed class ColumnInfo /// Oversampling parameter for randomized PCA training. /// If enabled, data is centered to be zero mean. /// The random seed. If unspecified random state will be instead derived from the . - public ColumnInfo(string name, + public ColumnOptions(string name, string inputColumnName = null, string weightColumn = Defaults.WeightColumn, int rank = Defaults.Rank, @@ -694,7 +694,7 @@ public ColumnInfo(string name, } private readonly IHost _host; - private readonly ColumnInfo[] _columns; + private readonly ColumnOptions[] _columns; /// /// The environment to use. @@ -712,14 +712,14 @@ internal PrincipalComponentAnalysisEstimator(IHostEnvironment env, string weightColumn = Defaults.WeightColumn, int rank = Defaults.Rank, int overSampling = Defaults.Oversampling, bool center = Defaults.Center, int? seed = null) - : this(env, new ColumnInfo(outputColumnName, inputColumnName ?? outputColumnName, weightColumn, rank, overSampling, center, seed)) + : this(env, new ColumnOptions(outputColumnName, inputColumnName ?? outputColumnName, weightColumn, rank, overSampling, center, seed)) { } /// /// The environment to use. /// The dataset columns to use, and their specific settings. - internal PrincipalComponentAnalysisEstimator(IHostEnvironment env, params ColumnInfo[] columns) + internal PrincipalComponentAnalysisEstimator(IHostEnvironment env, params ColumnOptions[] columns) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(nameof(PrincipalComponentAnalysisEstimator)); diff --git a/src/Microsoft.ML.StaticPipe/CategoricalHashStaticExtensions.cs b/src/Microsoft.ML.StaticPipe/CategoricalHashStaticExtensions.cs index c4e54be4f0..a80f91a208 100644 --- a/src/Microsoft.ML.StaticPipe/CategoricalHashStaticExtensions.cs +++ b/src/Microsoft.ML.StaticPipe/CategoricalHashStaticExtensions.cs @@ -99,11 +99,11 @@ private sealed class Rec : EstimatorReconciler public override IEstimator Reconcile(IHostEnvironment env, PipelineColumn[] toOutput, IReadOnlyDictionary inputNames, IReadOnlyDictionary outputNames, IReadOnlyCollection usedNames) { - var infos = new OneHotHashEncodingEstimator.ColumnInfo[toOutput.Length]; + var infos = new OneHotHashEncodingEstimator.ColumnOptions[toOutput.Length]; for (int i = 0; i < toOutput.Length; ++i) { var tcol = (ICategoricalCol)toOutput[i]; - infos[i] = new OneHotHashEncodingEstimator.ColumnInfo(outputNames[toOutput[i]], inputNames[tcol.Input], (OneHotEncodingTransformer.OutputKind)tcol.Config.OutputKind, + infos[i] = new OneHotHashEncodingEstimator.ColumnOptions(outputNames[toOutput[i]], inputNames[tcol.Input], (OneHotEncodingTransformer.OutputKind)tcol.Config.OutputKind, tcol.Config.HashBits, tcol.Config.Seed, tcol.Config.Ordered, tcol.Config.InvertHash); } return new OneHotHashEncodingEstimator(env, infos); diff --git a/src/Microsoft.ML.StaticPipe/CategoricalStaticExtensions.cs b/src/Microsoft.ML.StaticPipe/CategoricalStaticExtensions.cs index 2e12179687..e6691f0674 100644 --- a/src/Microsoft.ML.StaticPipe/CategoricalStaticExtensions.cs +++ b/src/Microsoft.ML.StaticPipe/CategoricalStaticExtensions.cs @@ -108,12 +108,12 @@ private sealed class Rec : EstimatorReconciler public override IEstimator Reconcile(IHostEnvironment env, PipelineColumn[] toOutput, IReadOnlyDictionary inputNames, IReadOnlyDictionary outputNames, IReadOnlyCollection usedNames) { - var infos = new OneHotEncodingEstimator.ColumnInfo[toOutput.Length]; + var infos = new OneHotEncodingEstimator.ColumnOptions[toOutput.Length]; Action onFit = null; for (int i = 0; i < toOutput.Length; ++i) { var tcol = (ICategoricalCol)toOutput[i]; - infos[i] = new OneHotEncodingEstimator.ColumnInfo(outputNames[toOutput[i]], inputNames[tcol.Input], (OneHotEncodingTransformer.OutputKind)tcol.Config.OutputKind, + infos[i] = new OneHotEncodingEstimator.ColumnOptions(outputNames[toOutput[i]], inputNames[tcol.Input], (OneHotEncodingTransformer.OutputKind)tcol.Config.OutputKind, tcol.Config.Max, (ValueToKeyMappingEstimator.SortOrder)tcol.Config.Order); if (tcol.Config.OnFit != null) { diff --git a/src/Microsoft.ML.StaticPipe/ImageTransformsStatic.cs b/src/Microsoft.ML.StaticPipe/ImageTransformsStatic.cs index 5814aa1618..7727648162 100644 --- a/src/Microsoft.ML.StaticPipe/ImageTransformsStatic.cs +++ b/src/Microsoft.ML.StaticPipe/ImageTransformsStatic.cs @@ -140,8 +140,8 @@ public OutPipelineColumn(PipelineColumn input, int width, int height, _cropAnchor = cropAnchor; } - private ImageResizingEstimator.ColumnInfo MakeColumnInfo(string outputColumnName, string inputColumnName) - => new ImageResizingEstimator.ColumnInfo(outputColumnName, _width, _height, inputColumnName, _resizing, _cropAnchor); + private ImageResizingEstimator.ColumnOptions MakeColumnOptions(string outputColumnName, string inputColumnName) + => new ImageResizingEstimator.ColumnOptions(outputColumnName, _width, _height, inputColumnName, _resizing, _cropAnchor); /// /// Reconciler to an for the . @@ -162,11 +162,11 @@ public override IEstimator Reconcile(IHostEnvironment env, IReadOnlyDictionary outputNames, IReadOnlyCollection usedNames) { - var cols = new ImageResizingEstimator.ColumnInfo[toOutput.Length]; + var cols = new ImageResizingEstimator.ColumnOptions[toOutput.Length]; for (int i = 0; i < toOutput.Length; ++i) { var outCol = (OutPipelineColumn)toOutput[i]; - cols[i] = outCol.MakeColumnInfo(outputNames[outCol], inputNames[outCol._input]); + cols[i] = outCol.MakeColumnOptions(outputNames[outCol], inputNames[outCol._input]); } return new ImageResizingEstimator(env, cols); } @@ -180,7 +180,7 @@ private interface IColInput { Custom Input { get; } - ImagePixelExtractingEstimator.ColumnInfo MakeColumnInfo(string outputColumnName, string inputColumnName); + ImagePixelExtractingEstimator.ColumnOptions MakeColumnOptions(string outputColumnName, string inputColumnName); } internal sealed class OutPipelineColumn : Vector, IColInput @@ -198,7 +198,7 @@ public OutPipelineColumn(Custom input, ImagePixelExtractingTransformer.C _colParam = col; } - public ImagePixelExtractingEstimator.ColumnInfo MakeColumnInfo(string outputColumnName, string inputColumnName) + public ImagePixelExtractingEstimator.ColumnOptions MakeColumnOptions(string outputColumnName, string inputColumnName) { // In principle, the analyzer should only call the the reconciler once for these columns. Contracts.Assert(_colParam.Source == null); @@ -206,7 +206,7 @@ public ImagePixelExtractingEstimator.ColumnInfo MakeColumnInfo(string outputColu _colParam.Name = outputColumnName; _colParam.Source = inputColumnName; - return new ImagePixelExtractingEstimator.ColumnInfo(_colParam, _defaultArgs); + return new ImagePixelExtractingEstimator.ColumnOptions(_colParam, _defaultArgs); } } @@ -231,11 +231,11 @@ public override IEstimator Reconcile(IHostEnvironment env, IReadOnlyDictionary outputNames, IReadOnlyCollection usedNames) { - var cols = new ImagePixelExtractingEstimator.ColumnInfo[toOutput.Length]; + var cols = new ImagePixelExtractingEstimator.ColumnOptions[toOutput.Length]; for (int i = 0; i < toOutput.Length; ++i) { var outCol = (IColInput)toOutput[i]; - cols[i] = outCol.MakeColumnInfo(outputNames[toOutput[i]], inputNames[outCol.Input]); + cols[i] = outCol.MakeColumnOptions(outputNames[toOutput[i]], inputNames[outCol.Input]); } return new ImagePixelExtractingEstimator(env, cols); } diff --git a/src/Microsoft.ML.StaticPipe/LdaStaticExtensions.cs b/src/Microsoft.ML.StaticPipe/LdaStaticExtensions.cs index 59219c74a4..6c4f6ec36d 100644 --- a/src/Microsoft.ML.StaticPipe/LdaStaticExtensions.cs +++ b/src/Microsoft.ML.StaticPipe/LdaStaticExtensions.cs @@ -99,13 +99,13 @@ public override IEstimator Reconcile(IHostEnvironment env, IReadOnlyDictionary outputNames, IReadOnlyCollection usedNames) { - var infos = new LatentDirichletAllocationEstimator.ColumnInfo[toOutput.Length]; + var infos = new LatentDirichletAllocationEstimator.ColumnOptions[toOutput.Length]; Action onFit = null; for (int i = 0; i < toOutput.Length; ++i) { var tcol = (ILdaCol)toOutput[i]; - infos[i] = new LatentDirichletAllocationEstimator.ColumnInfo(outputNames[toOutput[i]], + infos[i] = new LatentDirichletAllocationEstimator.ColumnOptions(outputNames[toOutput[i]], inputNames[tcol.Input], tcol.Config.NumTopic, tcol.Config.AlphaSum, diff --git a/src/Microsoft.ML.StaticPipe/NormalizerStaticExtensions.cs b/src/Microsoft.ML.StaticPipe/NormalizerStaticExtensions.cs index b3b3ed20a0..41b7e4f3cc 100644 --- a/src/Microsoft.ML.StaticPipe/NormalizerStaticExtensions.cs +++ b/src/Microsoft.ML.StaticPipe/NormalizerStaticExtensions.cs @@ -70,7 +70,7 @@ private static NormVector NormalizeByMinMaxCore(Vector input, bool fixZ { Contracts.CheckValue(input, nameof(input)); Contracts.CheckParam(maxTrainingExamples > 1, nameof(maxTrainingExamples), "Must be greater than 1"); - return new Impl(input, (name, src) => new NormalizingEstimator.MinMaxColumn(name, src, maxTrainingExamples, fixZero), AffineMapper(onFit)); + return new Impl(input, (name, src) => new NormalizingEstimator.MinMaxColumnOptions(name, src, maxTrainingExamples, fixZero), AffineMapper(onFit)); } // We have a slightly different breaking up of categories of normalizers versus the dynamic API. Both the mean-var and @@ -172,8 +172,8 @@ private static NormVector NormalizeByMVCdfCore(Vector input, bool fixZe return new Impl(input, (name, src) => { if (useLog) - return new NormalizingEstimator.LogMeanVarColumn(name, src, maxTrainingExamples, useCdf); - return new NormalizingEstimator.MeanVarColumn(name, src, maxTrainingExamples, fixZero, useCdf); + return new NormalizingEstimator.LogMeanVarColumnOptions(name, src, maxTrainingExamples, useCdf); + return new NormalizingEstimator.MeanVarColumnOptions(name, src, maxTrainingExamples, fixZero, useCdf); }, onFit); } @@ -233,7 +233,7 @@ private static NormVector NormalizeByBinningCore(Vector input, int numB Contracts.CheckValue(input, nameof(input)); Contracts.CheckParam(numBins > 1, nameof(maxTrainingExamples), "Must be greater than 1"); Contracts.CheckParam(maxTrainingExamples > 1, nameof(maxTrainingExamples), "Must be greater than 1"); - return new Impl(input, (name, src) => new NormalizingEstimator.BinningColumn(name, src, maxTrainingExamples, fixZero, numBins), BinMapper(onFit)); + return new Impl(input, (name, src) => new NormalizingEstimator.BinningColumnOptions(name, src, maxTrainingExamples, fixZero, numBins), BinMapper(onFit)); } /// @@ -269,7 +269,7 @@ private static NormVector NormalizeByBinningCore(Vector input, int numB public delegate void OnFitBinned(ImmutableArray upperBounds); #region Implementation support - private delegate NormalizingEstimator.ColumnBase CreateNormCol(string outputColumnName, string inputColumnName); + private delegate NormalizingEstimator.ColumnOptionsBase CreateNormCol(string outputColumnName, string inputColumnName); private sealed class Rec : EstimatorReconciler { @@ -279,7 +279,7 @@ private sealed class Rec : EstimatorReconciler public override IEstimator Reconcile(IHostEnvironment env, PipelineColumn[] toOutput, IReadOnlyDictionary inputNames, IReadOnlyDictionary outputNames, IReadOnlyCollection usedNames) { - var cols = new NormalizingEstimator.ColumnBase[toOutput.Length]; + var cols = new NormalizingEstimator.ColumnOptionsBase[toOutput.Length]; List<(int idx, Action onFit)> onFits = null; for (int i = 0; i < toOutput.Length; ++i) diff --git a/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs b/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs index 1a04a24d5b..e8b34ac44e 100644 --- a/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs +++ b/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs @@ -149,9 +149,9 @@ public override IEstimator Reconcile(IHostEnvironment env, { Contracts.Assert(toOutput.Length == 1); - var columns = new List(); + var columns = new List(); foreach (var outCol in toOutput) - columns.Add(new StopWordsRemovingEstimator.ColumnInfo(outputNames[outCol], inputNames[((OutPipelineColumn)outCol).Input], _language)); + columns.Add(new StopWordsRemovingEstimator.ColumnOptions(outputNames[outCol], inputNames[((OutPipelineColumn)outCol).Input], _language)); return new StopWordsRemovingEstimator(env, columns.ToArray()); } @@ -557,9 +557,9 @@ public override IEstimator Reconcile(IHostEnvironment env, IReadOnlyCollection usedNames) { Contracts.Assert(toOutput.Length == 1); - var columns = new List(); + var columns = new List(); foreach (var outCol in toOutput) - columns.Add(new NgramHashingEstimator.ColumnInfo(outputNames[outCol], new[] { inputNames[((OutPipelineColumn)outCol).Input] }, + columns.Add(new NgramHashingEstimator.ColumnOptions(outputNames[outCol], new[] { inputNames[((OutPipelineColumn)outCol).Input] }, _ngramLength, _skipLength, _allLengths, _hashBits, _seed, _ordered, _invertHash)); return new NgramHashingEstimator(env, columns.ToArray()); diff --git a/src/Microsoft.ML.StaticPipe/TransformsStatic.cs b/src/Microsoft.ML.StaticPipe/TransformsStatic.cs index 5a201d3d54..8f37b7084f 100644 --- a/src/Microsoft.ML.StaticPipe/TransformsStatic.cs +++ b/src/Microsoft.ML.StaticPipe/TransformsStatic.cs @@ -266,9 +266,9 @@ public override IEstimator Reconcile(IHostEnvironment env, { Contracts.Assert(toOutput.Length == 1); - var infos = new CountFeatureSelectingEstimator.ColumnInfo[toOutput.Length]; + var infos = new CountFeatureSelectingEstimator.ColumnOptions[toOutput.Length]; for (int i = 0; i < toOutput.Length; i++) - infos[i] = new CountFeatureSelectingEstimator.ColumnInfo(outputNames[toOutput[i]], inputNames[((OutPipelineColumn)toOutput[i]).Input], _count); + infos[i] = new CountFeatureSelectingEstimator.ColumnOptions(outputNames[toOutput[i]], inputNames[((OutPipelineColumn)toOutput[i]).Input], _count); return new CountFeatureSelectingEstimator(env, infos); } @@ -576,11 +576,11 @@ public override IEstimator Reconcile(IHostEnvironment env, IReadOnlyDictionary outputNames, IReadOnlyCollection usedNames) { - var infos = new KeyToVectorMappingEstimator.ColumnInfo[toOutput.Length]; + var infos = new KeyToVectorMappingEstimator.ColumnOptions[toOutput.Length]; for (int i = 0; i < toOutput.Length; ++i) { var col = (IColInput)toOutput[i]; - infos[i] = new KeyToVectorMappingEstimator.ColumnInfo(outputNames[toOutput[i]], inputNames[col.Input], col.Bag); + infos[i] = new KeyToVectorMappingEstimator.ColumnOptions(outputNames[toOutput[i]], inputNames[col.Input], col.Bag); } return new KeyToVectorMappingEstimator(env, infos); } @@ -733,9 +733,9 @@ public static class NAReplacerStaticExtensions private readonly struct Config { public readonly bool ImputeBySlot; - public readonly MissingValueReplacingEstimator.ColumnInfo.ReplacementMode ReplacementMode; + public readonly MissingValueReplacingEstimator.ColumnOptions.ReplacementMode ReplacementMode; - public Config(MissingValueReplacingEstimator.ColumnInfo.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.ReplacementMode, + public Config(MissingValueReplacingEstimator.ColumnOptions.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.ReplacementMode, bool imputeBySlot = MissingValueReplacingEstimator.Defaults.ImputeBySlot) { ImputeBySlot = imputeBySlot; @@ -801,11 +801,11 @@ public override IEstimator Reconcile(IHostEnvironment env, IReadOnlyDictionary outputNames, IReadOnlyCollection usedNames) { - var infos = new MissingValueReplacingEstimator.ColumnInfo[toOutput.Length]; + var infos = new MissingValueReplacingEstimator.ColumnOptions[toOutput.Length]; for (int i = 0; i < toOutput.Length; ++i) { var col = (IColInput)toOutput[i]; - infos[i] = new MissingValueReplacingEstimator.ColumnInfo(outputNames[toOutput[i]], inputNames[col.Input], col.Config.ReplacementMode, col.Config.ImputeBySlot); + infos[i] = new MissingValueReplacingEstimator.ColumnOptions(outputNames[toOutput[i]], inputNames[col.Input], col.Config.ReplacementMode, col.Config.ImputeBySlot); } return new MissingValueReplacingEstimator(env, infos); } @@ -816,7 +816,7 @@ public override IEstimator Reconcile(IHostEnvironment env, /// /// Incoming data. /// How NaN should be replaced - public static Scalar ReplaceNaNValues(this Scalar input, MissingValueReplacingEstimator.ColumnInfo.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.ReplacementMode) + public static Scalar ReplaceNaNValues(this Scalar input, MissingValueReplacingEstimator.ColumnOptions.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.ReplacementMode) { Contracts.CheckValue(input, nameof(input)); return new OutScalar(input, new Config(replacementMode, false)); @@ -827,7 +827,7 @@ public static Scalar ReplaceNaNValues(this Scalar input, MissingVa /// /// Incoming data. /// How NaN should be replaced - public static Scalar ReplaceNaNValues(this Scalar input, MissingValueReplacingEstimator.ColumnInfo.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.ReplacementMode) + public static Scalar ReplaceNaNValues(this Scalar input, MissingValueReplacingEstimator.ColumnOptions.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.ReplacementMode) { Contracts.CheckValue(input, nameof(input)); return new OutScalar(input, new Config(replacementMode, false)); @@ -840,7 +840,7 @@ public static Scalar ReplaceNaNValues(this Scalar input, Missing /// If true, per-slot imputation of replacement is performed. /// Otherwise, replacement value is imputed for the entire vector column. This setting is ignored for scalars and variable vectors, /// where imputation is always for the entire column. - public static Vector ReplaceNaNValues(this Vector input, MissingValueReplacingEstimator.ColumnInfo.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.ReplacementMode, bool imputeBySlot = MissingValueReplacingEstimator.Defaults.ImputeBySlot) + public static Vector ReplaceNaNValues(this Vector input, MissingValueReplacingEstimator.ColumnOptions.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.ReplacementMode, bool imputeBySlot = MissingValueReplacingEstimator.Defaults.ImputeBySlot) { Contracts.CheckValue(input, nameof(input)); return new OutVectorColumn(input, new Config(replacementMode, imputeBySlot)); @@ -854,7 +854,7 @@ public static Vector ReplaceNaNValues(this Vector input, MissingVa /// If true, per-slot imputation of replacement is performed. /// Otherwise, replacement value is imputed for the entire vector column. This setting is ignored for scalars and variable vectors, /// where imputation is always for the entire column. - public static Vector ReplaceNaNValues(this Vector input, MissingValueReplacingEstimator.ColumnInfo.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.ReplacementMode, bool imputeBySlot = MissingValueReplacingEstimator.Defaults.ImputeBySlot) + public static Vector ReplaceNaNValues(this Vector input, MissingValueReplacingEstimator.ColumnOptions.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.ReplacementMode, bool imputeBySlot = MissingValueReplacingEstimator.Defaults.ImputeBySlot) { Contracts.CheckValue(input, nameof(input)); return new OutVectorColumn(input, new Config(replacementMode, imputeBySlot)); @@ -865,7 +865,7 @@ public static Vector ReplaceNaNValues(this Vector input, Missing /// /// Incoming data. /// How NaN should be replaced - public static VarVector ReplaceNaNValues(this VarVector input, MissingValueReplacingEstimator.ColumnInfo.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.ReplacementMode) + public static VarVector ReplaceNaNValues(this VarVector input, MissingValueReplacingEstimator.ColumnOptions.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.ReplacementMode) { Contracts.CheckValue(input, nameof(input)); return new OutVarVectorColumn(input, new Config(replacementMode, false)); @@ -875,7 +875,7 @@ public static VarVector ReplaceNaNValues(this VarVector input, Mis /// /// Incoming data. /// How NaN should be replaced - public static VarVector ReplaceNaNValues(this VarVector input, MissingValueReplacingEstimator.ColumnInfo.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.ReplacementMode) + public static VarVector ReplaceNaNValues(this VarVector input, MissingValueReplacingEstimator.ColumnOptions.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.ReplacementMode) { Contracts.CheckValue(input, nameof(input)); return new OutVarVectorColumn(input, new Config(replacementMode, false)); @@ -931,11 +931,11 @@ private sealed class Rec : EstimatorReconciler public override IEstimator Reconcile(IHostEnvironment env, PipelineColumn[] toOutput, IReadOnlyDictionary inputNames, IReadOnlyDictionary outputNames, IReadOnlyCollection usedNames) { - var infos = new TypeConvertingEstimator.ColumnInfo[toOutput.Length]; + var infos = new TypeConvertingEstimator.ColumnOptions[toOutput.Length]; for (int i = 0; i < toOutput.Length; ++i) { var tcol = (IConvertCol)toOutput[i]; - infos[i] = new TypeConvertingEstimator.ColumnInfo(outputNames[toOutput[i]], tcol.Kind.ToDataKind(), inputNames[tcol.Input]); + infos[i] = new TypeConvertingEstimator.ColumnOptions(outputNames[toOutput[i]], tcol.Kind.ToDataKind(), inputNames[tcol.Input]); } return new TypeConvertingEstimator(env, infos); } @@ -1023,12 +1023,12 @@ public override IEstimator Reconcile(IHostEnvironment env, Pipelin IReadOnlyDictionary outputNames, IReadOnlyCollection usedNames) { - var infos = new ValueToKeyMappingEstimator.ColumnInfo[toOutput.Length]; + var infos = new ValueToKeyMappingEstimator.ColumnOptions[toOutput.Length]; Action onFit = null; for (int i = 0; i < toOutput.Length; ++i) { var tcol = (ITermCol)toOutput[i]; - infos[i] = new ValueToKeyMappingEstimator.ColumnInfo(outputNames[toOutput[i]], inputNames[tcol.Input], + infos[i] = new ValueToKeyMappingEstimator.ColumnOptions(outputNames[toOutput[i]], inputNames[tcol.Input], tcol.Config.Max, (ValueToKeyMappingEstimator.SortOrder)tcol.Config.Order); if (tcol.Config.OnFit != null) { @@ -1593,11 +1593,11 @@ private sealed class Reconciler : EstimatorReconciler public override IEstimator Reconcile(IHostEnvironment env, PipelineColumn[] toOutput, IReadOnlyDictionary inputNames, IReadOnlyDictionary outputNames, IReadOnlyCollection usedNames) { - var infos = new RandomFourierFeaturizingEstimator.ColumnInfo[toOutput.Length]; + var infos = new RandomFourierFeaturizingEstimator.ColumnOptions[toOutput.Length]; for (int i = 0; i < toOutput.Length; ++i) { var tcol = (IColInput)toOutput[i]; - infos[i] = new RandomFourierFeaturizingEstimator.ColumnInfo(outputNames[toOutput[i]], tcol.Config.NewDim, tcol.Config.UseSin, inputNames[tcol.Input], tcol.Config.Generator, tcol.Config.Seed); + infos[i] = new RandomFourierFeaturizingEstimator.ColumnOptions(outputNames[toOutput[i]], tcol.Config.NewDim, tcol.Config.UseSin, inputNames[tcol.Input], tcol.Config.Generator, tcol.Config.Seed); } return new RandomFourierFeaturizingEstimator(env, infos); } @@ -1638,11 +1638,11 @@ public OutPipelineColumn(Vector input, string weightColumn, int rank, private sealed class Reconciler : EstimatorReconciler { - private readonly PrincipalComponentAnalysisEstimator.ColumnInfo _colInfo; + private readonly PrincipalComponentAnalysisEstimator.ColumnOptions _colInfo; public Reconciler(string weightColumn, int rank, int overSampling, bool center, int? seed = null) { - _colInfo = new PrincipalComponentAnalysisEstimator.ColumnInfo( + _colInfo = new PrincipalComponentAnalysisEstimator.ColumnOptions( null, null, weightColumn, rank, overSampling, center, seed); } diff --git a/src/Microsoft.ML.StaticPipe/WordEmbeddingsStaticExtensions.cs b/src/Microsoft.ML.StaticPipe/WordEmbeddingsStaticExtensions.cs index a79c2de4d1..7f6d9fde60 100644 --- a/src/Microsoft.ML.StaticPipe/WordEmbeddingsStaticExtensions.cs +++ b/src/Microsoft.ML.StaticPipe/WordEmbeddingsStaticExtensions.cs @@ -70,11 +70,11 @@ public override IEstimator Reconcile(IHostEnvironment env, { Contracts.Assert(toOutput.Length == 1); - var cols = new WordEmbeddingsExtractingEstimator.ColumnInfo[toOutput.Length]; + var cols = new WordEmbeddingsExtractingEstimator.ColumnOptions[toOutput.Length]; for (int i = 0; i < toOutput.Length; ++i) { var outCol = (OutColumn)toOutput[i]; - cols[i] = new WordEmbeddingsExtractingEstimator.ColumnInfo(outputNames[outCol], inputNames[outCol.Input]); + cols[i] = new WordEmbeddingsExtractingEstimator.ColumnOptions(outputNames[outCol], inputNames[outCol.Input]); } bool customLookup = !string.IsNullOrWhiteSpace(_customLookupTable); diff --git a/src/Microsoft.ML.Transforms/CategoricalCatalog.cs b/src/Microsoft.ML.Transforms/CategoricalCatalog.cs index 24871f763d..36ee0f83a9 100644 --- a/src/Microsoft.ML.Transforms/CategoricalCatalog.cs +++ b/src/Microsoft.ML.Transforms/CategoricalCatalog.cs @@ -32,7 +32,7 @@ public static OneHotEncodingEstimator OneHotEncoding(this TransformsCatalog.Cate /// The transform catalog /// The column settings. public static OneHotEncodingEstimator OneHotEncoding(this TransformsCatalog.CategoricalTransforms catalog, - params OneHotEncodingEstimator.ColumnInfo[] columns) + params OneHotEncodingEstimator.ColumnOptions[] columns) => new OneHotEncodingEstimator(CatalogUtils.GetEnvironment(catalog), columns); /// @@ -43,7 +43,7 @@ public static OneHotEncodingEstimator OneHotEncoding(this TransformsCatalog.Cate /// Specifies an ordering for the encoding. If specified, this should be a single column data view, /// and the key-values will be taken from that column. If unspecified, the ordering will be determined from the input data upon fitting. public static OneHotEncodingEstimator OneHotEncoding(this TransformsCatalog.CategoricalTransforms catalog, - OneHotEncodingEstimator.ColumnInfo[] columns, + OneHotEncodingEstimator.ColumnOptions[] columns, IDataView keyData = null) => new OneHotEncodingEstimator(CatalogUtils.GetEnvironment(catalog), columns, keyData); @@ -73,7 +73,7 @@ public static OneHotHashEncodingEstimator OneHotHashEncoding(this TransformsCata /// The transform catalog /// The column settings. public static OneHotHashEncodingEstimator OneHotHashEncoding(this TransformsCatalog.CategoricalTransforms catalog, - params OneHotHashEncodingEstimator.ColumnInfo[] columns) + params OneHotHashEncodingEstimator.ColumnOptions[] columns) => new OneHotHashEncodingEstimator(CatalogUtils.GetEnvironment(catalog), columns); } } diff --git a/src/Microsoft.ML.Transforms/ConversionsCatalog.cs b/src/Microsoft.ML.Transforms/ConversionsCatalog.cs index d00078785c..0dc119a3f5 100644 --- a/src/Microsoft.ML.Transforms/ConversionsCatalog.cs +++ b/src/Microsoft.ML.Transforms/ConversionsCatalog.cs @@ -19,8 +19,8 @@ public static class ConversionsCatalog /// The categorical transform's catalog. /// The input column. public static KeyToBinaryVectorMappingEstimator MapKeyToBinaryVector(this TransformsCatalog.ConversionTransforms catalog, - params SimpleColumnInfo[] columns) - => new KeyToBinaryVectorMappingEstimator(CatalogUtils.GetEnvironment(catalog), SimpleColumnInfo.ConvertToValueTuples(columns)); + params ColumnOptions[] columns) + => new KeyToBinaryVectorMappingEstimator(CatalogUtils.GetEnvironment(catalog), ColumnOptions.ConvertToValueTuples(columns)); /// /// Convert the key types back to binary vector. diff --git a/src/Microsoft.ML.Transforms/CountFeatureSelection.cs b/src/Microsoft.ML.Transforms/CountFeatureSelection.cs index 56308d937d..a1f3f4c2ee 100644 --- a/src/Microsoft.ML.Transforms/CountFeatureSelection.cs +++ b/src/Microsoft.ML.Transforms/CountFeatureSelection.cs @@ -26,7 +26,7 @@ public sealed class CountFeatureSelectingEstimator : IEstimator internal const string UserName = "Count Feature Selection Transform"; private readonly IHost _host; - private readonly ColumnInfo[] _columns; + private readonly ColumnOptions[] _columns; [BestFriend] internal static class Defaults @@ -48,7 +48,7 @@ internal sealed class Options : TransformInputBase /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnInfo + public sealed class ColumnOptions { /// Name of the column resulting from the transformation of . public readonly string Name; @@ -63,7 +63,7 @@ public sealed class ColumnInfo /// Name of the column resulting from the transformation of . /// Name of the column to transform. If set to , the value of the will be used as source. /// If the count of non-default values for a slot is greater than or equal to this threshold in the training data, the slot is preserved. - public ColumnInfo(string name, string inputColumnName = null, long minCount = Defaults.Count) + public ColumnOptions(string name, string inputColumnName = null, long minCount = Defaults.Count) { Name = name; Contracts.CheckValue(Name, nameof(Name)); @@ -84,7 +84,7 @@ public ColumnInfo(string name, string inputColumnName = null, long minCount = De /// ]]> /// /// - internal CountFeatureSelectingEstimator(IHostEnvironment env, params ColumnInfo[] columns) + internal CountFeatureSelectingEstimator(IHostEnvironment env, params ColumnOptions[] columns) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(RegistrationName); @@ -106,7 +106,7 @@ internal CountFeatureSelectingEstimator(IHostEnvironment env, params ColumnInfo[ /// /// internal CountFeatureSelectingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null, long minCount = Defaults.Count) - : this(env, new ColumnInfo(outputColumnName, inputColumnName ?? outputColumnName, minCount)) + : this(env, new ColumnOptions(outputColumnName, inputColumnName ?? outputColumnName, minCount)) { } @@ -149,7 +149,7 @@ public ITransformer Fit(IDataView input) using (var ch = _host.Start("Dropping Slots")) { // If no slots should be dropped from a column, use copy column to generate the corresponding output column. - SlotsDroppingTransformer.ColumnInfo[] dropSlotsColumns; + SlotsDroppingTransformer.ColumnOptions[] dropSlotsColumns; (string outputColumnName, string inputColumnName)[] copyColumnsPairs; CreateDropAndCopyColumns(_columns, size, scores, out int[] selectedCount, out dropSlotsColumns, out copyColumnsPairs); @@ -183,21 +183,21 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa host.CheckUserArg(Utils.Size(options.Columns) > 0, nameof(options.Columns)); host.CheckUserArg(options.Count > 0, nameof(options.Count)); - var columnInfos = options.Columns.Select(inColName => new ColumnInfo(inColName, minCount: options.Count)).ToArray(); + var columnOptionss = options.Columns.Select(inColName => new ColumnOptions(inColName, minCount: options.Count)).ToArray(); - return new CountFeatureSelectingEstimator(env, columnInfos).Fit(input).Transform(input) as IDataTransform; + return new CountFeatureSelectingEstimator(env, columnOptionss).Fit(input).Transform(input) as IDataTransform; } - private static void CreateDropAndCopyColumns(ColumnInfo[] columnInfos, int size, long[][] scores, - out int[] selectedCount, out SlotsDroppingTransformer.ColumnInfo[] dropSlotsColumns, out (string outputColumnName, string inputColumnName)[] copyColumnsPairs) + private static void CreateDropAndCopyColumns(ColumnOptions[] columnOptionss, int size, long[][] scores, + out int[] selectedCount, out SlotsDroppingTransformer.ColumnOptions[] dropSlotsColumns, out (string outputColumnName, string inputColumnName)[] copyColumnsPairs) { Contracts.Assert(size > 0); Contracts.Assert(Utils.Size(scores) == size); - Contracts.AssertValue(columnInfos); - Contracts.Assert(Utils.Size(columnInfos) == size); + Contracts.AssertValue(columnOptionss); + Contracts.Assert(Utils.Size(columnOptionss) == size); selectedCount = new int[scores.Length]; - var dropSlotsCols = new List(); + var dropSlotsCols = new List(); var copyCols = new List<(string outputColumnName, string inputColumnName)>(); for (int i = 0; i < size; i++) { @@ -206,11 +206,11 @@ private static void CreateDropAndCopyColumns(ColumnInfo[] columnInfos, int size, selectedCount[i] = 0; for (int j = 0; j < score.Length; j++) { - if (score[j] < columnInfos[i].MinCount) + if (score[j] < columnOptionss[i].MinCount) { // Adjacent slots are combined into a single range. int min = j; - while (j < score.Length && score[j] < columnInfos[i].MinCount) + while (j < score.Length && score[j] < columnOptionss[i].MinCount) j++; int max = j - 1; slots.Add((min, max)); @@ -221,9 +221,9 @@ private static void CreateDropAndCopyColumns(ColumnInfo[] columnInfos, int size, selectedCount[i]++; } if (slots.Count <= 0) - copyCols.Add((columnInfos[i].Name, columnInfos[i].InputColumnName)); + copyCols.Add((columnOptionss[i].Name, columnOptionss[i].InputColumnName)); else - dropSlotsCols.Add(new SlotsDroppingTransformer.ColumnInfo(columnInfos[i].Name, columnInfos[i].InputColumnName, slots.ToArray())); + dropSlotsCols.Add(new SlotsDroppingTransformer.ColumnOptions(columnOptionss[i].Name, columnOptionss[i].InputColumnName, slots.ToArray())); } dropSlotsColumns = dropSlotsCols.ToArray(); copyColumnsPairs = copyCols.ToArray(); diff --git a/src/Microsoft.ML.Transforms/EntryPoints/TextAnalytics.cs b/src/Microsoft.ML.Transforms/EntryPoints/TextAnalytics.cs index 697b39601e..7959a59c92 100644 --- a/src/Microsoft.ML.Transforms/EntryPoints/TextAnalytics.cs +++ b/src/Microsoft.ML.Transforms/EntryPoints/TextAnalytics.cs @@ -120,7 +120,7 @@ public static CommonOutputs.TransformOutput LightLda(IHostEnvironment env, Laten env.CheckValue(input, nameof(input)); var h = EntryPointUtils.CheckArgsAndCreateHost(env, "LightLda", input); - var cols = input.Columns.Select(colPair => new LatentDirichletAllocationEstimator.ColumnInfo(colPair, input)).ToArray(); + var cols = input.Columns.Select(colPair => new LatentDirichletAllocationEstimator.ColumnOptions(colPair, input)).ToArray(); var est = new LatentDirichletAllocationEstimator(h, cols); var view = est.Fit(input.Data).Transform(input.Data); diff --git a/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs b/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs index fea8bc2e18..e57dab218e 100644 --- a/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs @@ -16,8 +16,8 @@ public static class ExtensionsCatalog /// The transform extensions' catalog. /// The names of the input columns of the transformation and the corresponding names for the output columns. public static MissingValueIndicatorEstimator IndicateMissingValues(this TransformsCatalog catalog, - params SimpleColumnInfo[] columns) - => new MissingValueIndicatorEstimator(CatalogUtils.GetEnvironment(catalog), SimpleColumnInfo.ConvertToValueTuples(columns)); + params ColumnOptions[] columns) + => new MissingValueIndicatorEstimator(CatalogUtils.GetEnvironment(catalog), ColumnOptions.ConvertToValueTuples(columns)); /// /// Creates a new output column, or replaces the source with a new column @@ -39,26 +39,26 @@ public static MissingValueIndicatorEstimator IndicateMissingValues(this Transfor /// (depending on whether the is given a value, or left to null) /// identical to the input column for everything but the missing values. The missing values of the input column, in this new column are replaced with /// one of the values specifid in the . The default for the is - /// . + /// . /// /// The transform extensions' catalog. /// Name of the column resulting from the transformation of . /// Name of column to transform. If set to , the value of the will be used as source. /// If not provided, the will be replaced with the results of the transforms. - /// The type of replacement to use as specified in + /// The type of replacement to use as specified in public static MissingValueReplacingEstimator ReplaceMissingValues(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null, - MissingValueReplacingEstimator.ColumnInfo.ReplacementMode replacementKind = MissingValueReplacingEstimator.Defaults.ReplacementMode) + MissingValueReplacingEstimator.ColumnOptions.ReplacementMode replacementKind = MissingValueReplacingEstimator.Defaults.ReplacementMode) => new MissingValueReplacingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, replacementKind); /// /// Creates a new output column, identical to the input column for everything but the missing values. - /// The missing values of the input column, in this new column are replaced with . + /// The missing values of the input column, in this new column are replaced with . /// /// The transform extensions' catalog. /// The name of the columns to use, and per-column transformation configuraiton. - public static MissingValueReplacingEstimator ReplaceMissingValues(this TransformsCatalog catalog, params MissingValueReplacingEstimator.ColumnInfo[] columns) + public static MissingValueReplacingEstimator ReplaceMissingValues(this TransformsCatalog catalog, params MissingValueReplacingEstimator.ColumnOptions[] columns) => new MissingValueReplacingEstimator(CatalogUtils.GetEnvironment(catalog), columns); } } diff --git a/src/Microsoft.ML.Transforms/FeatureSelectionCatalog.cs b/src/Microsoft.ML.Transforms/FeatureSelectionCatalog.cs index 8520cfe296..5217043a50 100644 --- a/src/Microsoft.ML.Transforms/FeatureSelectionCatalog.cs +++ b/src/Microsoft.ML.Transforms/FeatureSelectionCatalog.cs @@ -29,9 +29,9 @@ public static MutualInformationFeatureSelectingEstimator SelectFeaturesBasedOnMu string labelColumn = MutualInfoSelectDefaults.LabelColumn, int slotsInOutput = MutualInfoSelectDefaults.SlotsInOutput, int numBins = MutualInfoSelectDefaults.NumBins, - params SimpleColumnInfo[] columns) + params ColumnOptions[] columns) => new MutualInformationFeatureSelectingEstimator(CatalogUtils.GetEnvironment(catalog), labelColumn, slotsInOutput, numBins, - SimpleColumnInfo.ConvertToValueTuples(columns)); + ColumnOptions.ConvertToValueTuples(columns)); /// /// The transform's catalog. @@ -65,7 +65,7 @@ public static MutualInformationFeatureSelectingEstimator SelectFeaturesBasedOnMu /// /// public static CountFeatureSelectingEstimator SelectFeaturesBasedOnCount(this TransformsCatalog.FeatureSelectionTransforms catalog, - params CountFeatureSelectingEstimator.ColumnInfo[] columns) + params CountFeatureSelectingEstimator.ColumnOptions[] columns) => new CountFeatureSelectingEstimator(CatalogUtils.GetEnvironment(catalog), columns); /// diff --git a/src/Microsoft.ML.Transforms/GcnTransform.cs b/src/Microsoft.ML.Transforms/GcnTransform.cs index 354fed5e91..98923d2cda 100644 --- a/src/Microsoft.ML.Transforms/GcnTransform.cs +++ b/src/Microsoft.ML.Transforms/GcnTransform.cs @@ -146,9 +146,9 @@ internal bool TryUnparse(StringBuilder sb) } } - private sealed class ColumnInfoLoaded : LpNormalizingEstimatorBase.ColumnInfoBase + private sealed class ColumnOptionsLoaded : LpNormalizingEstimatorBase.ColumnOptionsBase { - internal ColumnInfoLoaded(ModelLoadContext ctx, string name, string inputColumnName, bool normKindSerialized) + internal ColumnOptionsLoaded(ModelLoadContext ctx, string name, string inputColumnName, bool normKindSerialized) : base(ctx, name, inputColumnName, normKindSerialized) { @@ -193,10 +193,10 @@ private static VersionInfo GetVersionInfo() /// /// The objects describing how the transformation is applied on the input data. /// - public IReadOnlyCollection Columns => _columns.AsReadOnly(); - private readonly LpNormalizingEstimatorBase.ColumnInfoBase[] _columns; + public IReadOnlyCollection Columns => _columns.AsReadOnly(); + private readonly LpNormalizingEstimatorBase.ColumnOptionsBase[] _columns; - private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(LpNormalizingEstimatorBase.ColumnInfoBase[] columns) + private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(LpNormalizingEstimatorBase.ColumnOptionsBase[] columns) { Contracts.CheckValue(columns, nameof(columns)); return columns.Select(x => (x.Name, x.InputColumnName)).ToArray(); @@ -211,7 +211,7 @@ private protected override void CheckInputColumn(DataViewSchema inputSchema, int /// /// Create a that takes multiple pairs of columns. /// - internal LpNormalizingTransformer(IHostEnvironment env, params LpNormalizingEstimatorBase.ColumnInfoBase[] columns) : + internal LpNormalizingTransformer(IHostEnvironment env, params LpNormalizingEstimatorBase.ColumnOptionsBase[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(LpNormalizingTransformer)), GetColumnPairs(columns)) { _columns = columns.ToArray(); @@ -225,13 +225,13 @@ internal static IDataTransform Create(IHostEnvironment env, GcnOptions options, env.CheckValue(input, nameof(input)); env.CheckValue(options.Columns, nameof(options.Columns)); - var cols = new GlobalContrastNormalizingEstimator.GcnColumnInfo[options.Columns.Length]; + var cols = new GlobalContrastNormalizingEstimator.GcnColumnOptions[options.Columns.Length]; using (var ch = env.Start("ValidateArgs")) { for (int i = 0; i < cols.Length; i++) { var item = options.Columns[i]; - cols[i] = new GlobalContrastNormalizingEstimator.GcnColumnInfo( + cols[i] = new GlobalContrastNormalizingEstimator.GcnColumnOptions( item.Name, item.Source ?? item.Name, item.SubMean ?? options.SubMean, @@ -252,13 +252,13 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa env.CheckValue(input, nameof(input)); env.CheckValue(options.Columns, nameof(options.Columns)); - var cols = new LpNormalizingEstimator.LpNormColumnInfo[options.Columns.Length]; + var cols = new LpNormalizingEstimator.LpNormColumnOptions[options.Columns.Length]; using (var ch = env.Start("ValidateArgs")) { for (int i = 0; i < cols.Length; i++) { var item = options.Columns[i]; - cols[i] = new LpNormalizingEstimator.LpNormColumnInfo( + cols[i] = new LpNormalizingEstimator.LpNormColumnOptions( item.Name, item.Source ?? item.Name, item.SubMean ?? options.SubMean, @@ -300,9 +300,9 @@ private LpNormalizingTransformer(IHost host, ModelLoadContext ctx) // // var columnsLength = ColumnPairs.Length; - _columns = new ColumnInfoLoaded[columnsLength]; + _columns = new ColumnOptionsLoaded[columnsLength]; for (int i = 0; i < columnsLength; i++) - _columns[i] = new ColumnInfoLoaded(ctx, ColumnPairs[i].outputColumnName, ColumnPairs[i].inputColumnName, ctx.Header.ModelVerWritten >= VerVectorNormalizerSupported); + _columns[i] = new ColumnOptionsLoaded(ctx, ColumnPairs[i].outputColumnName, ColumnPairs[i].inputColumnName, ctx.Header.ModelVerWritten >= VerVectorNormalizerSupported); } private protected override void SaveModel(ModelSaveContext ctx) @@ -660,7 +660,7 @@ public enum NormalizerKind : byte /// /// Describes base class for one column pair. /// - public abstract class ColumnInfoBase + public abstract class ColumnOptionsBase { /// /// Name of the column resulting from the transformation of . @@ -683,7 +683,7 @@ public abstract class ColumnInfoBase /// public readonly float Scale; - internal ColumnInfoBase(string name, string inputColumnName, bool substractMean, NormalizerKind normalizerKind, float scale) + internal ColumnOptionsBase(string name, string inputColumnName, bool substractMean, NormalizerKind normalizerKind, float scale) { Contracts.CheckNonWhiteSpace(name, nameof(name)); Contracts.CheckNonWhiteSpace(inputColumnName, nameof(inputColumnName)); @@ -695,7 +695,7 @@ internal ColumnInfoBase(string name, string inputColumnName, bool substractMean, NormKind = normalizerKind; } - internal ColumnInfoBase(ModelLoadContext ctx, string name, string inputColumnName, bool normKindSerialized) + internal ColumnOptionsBase(ModelLoadContext ctx, string name, string inputColumnName, bool normKindSerialized) { Contracts.AssertValue(ctx); Contracts.CheckNonWhiteSpace(inputColumnName, nameof(inputColumnName)); @@ -748,7 +748,7 @@ internal static class Defaults /// /// Create a that takes multiple pairs of columns. /// - internal LpNormalizingEstimatorBase(IHostEnvironment env, params ColumnInfoBase[] columns) + internal LpNormalizingEstimatorBase(IHostEnvironment env, params ColumnOptionsBase[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(LpNormalizingEstimator)), new LpNormalizingTransformer(env, columns)) { } @@ -801,7 +801,7 @@ public sealed class LpNormalizingEstimator : LpNormalizingEstimatorBase /// /// Describes how the transformer handles one column pair. /// - public sealed class LpNormColumnInfo : ColumnInfoBase + public sealed class LpNormColumnOptions : ColumnOptionsBase { /// /// Describes how the transformer handles one column pair. @@ -810,7 +810,7 @@ public sealed class LpNormColumnInfo : ColumnInfoBase /// Name of column to transform. If set to , the value of the will be used as source. /// Subtract mean from each value before normalizing. /// The norm to use to normalize each sample. - public LpNormColumnInfo(string name, string inputColumnName = null, + public LpNormColumnOptions(string name, string inputColumnName = null, bool substractMean = Defaults.LpSubstractMean, NormalizerKind normalizerKind = Defaults.NormKind) : base(name, inputColumnName ?? name, substractMean, normalizerKind, 1) @@ -836,14 +836,14 @@ internal LpNormalizingEstimator(IHostEnvironment env, string outputColumnName, s /// Subtract mean from each value before normalizing. internal LpNormalizingEstimator(IHostEnvironment env, (string outputColumnName, string inputColumnName)[] columns, NormalizerKind normKind = Defaults.NormKind, bool substractMean = Defaults.LpSubstractMean) - : this(env, columns.Select(x => new LpNormColumnInfo(x.outputColumnName, x.inputColumnName, substractMean, normKind)).ToArray()) + : this(env, columns.Select(x => new LpNormColumnOptions(x.outputColumnName, x.inputColumnName, substractMean, normKind)).ToArray()) { } /// /// Create a that takes multiple pairs of columns. /// - internal LpNormalizingEstimator(IHostEnvironment env, params LpNormColumnInfo[] columns) + internal LpNormalizingEstimator(IHostEnvironment env, params LpNormColumnOptions[] columns) : base(env, columns) { } @@ -857,7 +857,7 @@ public sealed class GlobalContrastNormalizingEstimator : LpNormalizingEstimatorB /// /// Describes how the transformer handles one Gcn column pair. /// - public sealed class GcnColumnInfo : ColumnInfoBase + public sealed class GcnColumnOptions : ColumnOptionsBase { /// /// Describes how the transformer handles one Gcn column pair. @@ -867,7 +867,7 @@ public sealed class GcnColumnInfo : ColumnInfoBase /// Subtract mean from each value before normalizing. /// Normalize by standard deviation rather than L2 norm. /// Scale features by this value. - public GcnColumnInfo(string name, string inputColumnName = null, + public GcnColumnOptions(string name, string inputColumnName = null, bool substractMean = Defaults.GcnSubstractMean, bool useStdDev = Defaults.UseStdDev, float scale = Defaults.Scale) @@ -897,14 +897,14 @@ internal GlobalContrastNormalizingEstimator(IHostEnvironment env, string outputC /// Scale features by this value. internal GlobalContrastNormalizingEstimator(IHostEnvironment env, (string outputColumnName, string inputColumnName)[] columns, bool substractMean = Defaults.GcnSubstractMean, bool useStdDev = Defaults.UseStdDev, float scale = Defaults.Scale) - : this(env, columns.Select(x => new GcnColumnInfo(x.outputColumnName, x.inputColumnName, substractMean, useStdDev, scale)).ToArray()) + : this(env, columns.Select(x => new GcnColumnOptions(x.outputColumnName, x.inputColumnName, substractMean, useStdDev, scale)).ToArray()) { } /// /// Create a that takes multiple pairs of columns. /// - internal GlobalContrastNormalizingEstimator(IHostEnvironment env, params GcnColumnInfo[] columns) : + internal GlobalContrastNormalizingEstimator(IHostEnvironment env, params GcnColumnOptions[] columns) : base(env, columns) { } diff --git a/src/Microsoft.ML.Transforms/HashJoiningTransform.cs b/src/Microsoft.ML.Transforms/HashJoiningTransform.cs index 87d70aa242..a59046e714 100644 --- a/src/Microsoft.ML.Transforms/HashJoiningTransform.cs +++ b/src/Microsoft.ML.Transforms/HashJoiningTransform.cs @@ -106,7 +106,7 @@ internal bool TryUnparse(StringBuilder sb) } } - public sealed class ColumnInfoEx + public sealed class ColumnOptionsEx { // Either VBuffer> or a single Key. // Note that if CustomSlotMap contains only one array, the output type of the transform will a single Key. @@ -124,7 +124,7 @@ public int OutputValueCount get { return OutputColumnType.GetValueCount(); } } - public ColumnInfoEx(int[][] slotMap, int hashBits, uint hashSeed, bool ordered) + public ColumnOptionsEx(int[][] slotMap, int hashBits, uint hashSeed, bool ordered) { Contracts.CheckValueOrNull(slotMap); Contracts.Check(NumBitsMin <= hashBits && hashBits < NumBitsLim); @@ -173,7 +173,7 @@ private static VersionInfo GetVersionInfo() loaderAssemblyName: typeof(HashJoiningTransform).Assembly.FullName); } - private readonly ColumnInfoEx[] _exes; + private readonly ColumnOptionsEx[] _exes; /// /// Initializes a new instance of . @@ -204,12 +204,12 @@ public HashJoiningTransform(IHostEnvironment env, Arguments args, IDataView inpu if (args.HashBits < NumBitsMin || args.HashBits >= NumBitsLim) throw Host.ExceptUserArg(nameof(args.HashBits), "hashBits should be between {0} and {1} inclusive", NumBitsMin, NumBitsLim - 1); - _exes = new ColumnInfoEx[Infos.Length]; + _exes = new ColumnOptionsEx[Infos.Length]; for (int i = 0; i < Infos.Length; i++) { var hashBits = args.Columns[i].HashBits ?? args.HashBits; Host.CheckUserArg(NumBitsMin <= hashBits && hashBits < NumBitsLim, nameof(args.HashBits)); - _exes[i] = CreateColumnInfoEx( + _exes[i] = CreateColumnOptionsEx( args.Columns[i].Join ?? args.Join, args.Columns[i].CustomSlotMap, args.Columns[i].HashBits ?? args.HashBits, @@ -238,7 +238,7 @@ private HashJoiningTransform(IHost host, ModelLoadContext ctx, IDataView input) Host.AssertNonEmpty(Infos); - _exes = new ColumnInfoEx[Infos.Length]; + _exes = new ColumnOptionsEx[Infos.Length]; for (int i = 0; i < Infos.Length; i++) { int hashBits = ctx.Reader.ReadInt32(); @@ -268,7 +268,7 @@ private HashJoiningTransform(IHost host, ModelLoadContext ctx, IDataView input) } } - _exes[i] = new ColumnInfoEx(slotMap, hashBits, hashSeed, ordered); + _exes[i] = new ColumnOptionsEx(slotMap, hashBits, hashSeed, ordered); } SetMetadata(); @@ -327,7 +327,7 @@ private protected override void SaveModel(ModelSaveContext ctx) } } - private ColumnInfoEx CreateColumnInfoEx(bool join, string customSlotMap, int hashBits, uint hashSeed, bool ordered, ColInfo colInfo) + private ColumnOptionsEx CreateColumnOptionsEx(bool join, string customSlotMap, int hashBits, uint hashSeed, bool ordered, ColInfo colInfo) { int[][] slotMap = null; if (colInfo.TypeSrc is VectorType vectorType) @@ -340,7 +340,7 @@ private ColumnInfoEx CreateColumnInfoEx(bool join, string customSlotMap, int has Host.Assert(Utils.Size(slotMap) >= 1); } - return new ColumnInfoEx(slotMap, hashBits, hashSeed, ordered); + return new ColumnOptionsEx(slotMap, hashBits, hashSeed, ordered); } private int[][] CompileSlotMap(string slotMapString, int srcSlotCount) diff --git a/src/Microsoft.ML.Transforms/LearnerFeatureSelection.cs b/src/Microsoft.ML.Transforms/LearnerFeatureSelection.cs index 5ccb1d5103..96f52e3252 100644 --- a/src/Microsoft.ML.Transforms/LearnerFeatureSelection.cs +++ b/src/Microsoft.ML.Transforms/LearnerFeatureSelection.cs @@ -113,7 +113,7 @@ private static IDataTransform Create(IHostEnvironment env, Options options, IDat } } - private static SlotsDroppingTransformer.ColumnInfo CreateDropSlotsColumn(Options options, in VBuffer scores, out int selectedCount) + private static SlotsDroppingTransformer.ColumnOptions CreateDropSlotsColumn(Options options, in VBuffer scores, out int selectedCount) { // Not checking the scores.Length, because: // 1. If it's the same as the features column length, we should be constructing the right DropSlots arguments. @@ -127,7 +127,7 @@ private static SlotsDroppingTransformer.ColumnInfo CreateDropSlotsColumn(Options // Degenerate case, dropping all slots. if (scoresValues.Length == 0) - return new SlotsDroppingTransformer.ColumnInfo(options.FeatureColumn); + return new SlotsDroppingTransformer.ColumnOptions(options.FeatureColumn); int tiedScoresToKeep; float threshold; @@ -224,7 +224,7 @@ private static SlotsDroppingTransformer.ColumnInfo CreateDropSlotsColumn(Options } if (slots.Count > 0) - return new SlotsDroppingTransformer.ColumnInfo(options.FeatureColumn, slots: slots.ToArray()); + return new SlotsDroppingTransformer.ColumnOptions(options.FeatureColumn, slots: slots.ToArray()); return null; } diff --git a/src/Microsoft.ML.Transforms/MissingValueHandlingTransformer.cs b/src/Microsoft.ML.Transforms/MissingValueHandlingTransformer.cs index 7016cb40f1..a053f0c0f7 100644 --- a/src/Microsoft.ML.Transforms/MissingValueHandlingTransformer.cs +++ b/src/Microsoft.ML.Transforms/MissingValueHandlingTransformer.cs @@ -140,9 +140,9 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa h.CheckValue(input, nameof(input)); h.CheckUserArg(Utils.Size(options.Columns) > 0, nameof(options.Columns)); - var replaceCols = new List(); + var replaceCols = new List(); var naIndicatorCols = new List(); - var naConvCols = new List(); + var naConvCols = new List(); var concatCols = new List(); var dropCols = new List(); var tmpIsMissingColNames = input.Schema.GetTempColumnNames(options.Columns.Length, "IsMissing"); @@ -154,8 +154,8 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa var addInd = column.ConcatIndicator ?? options.Concat; if (!addInd) { - replaceCols.Add(new MissingValueReplacingEstimator.ColumnInfo(column.Name, column.Source, - (MissingValueReplacingEstimator.ColumnInfo.ReplacementMode)(column.Kind ?? options.ReplaceWith), column.ImputeBySlot ?? options.ImputeBySlot)); + replaceCols.Add(new MissingValueReplacingEstimator.ColumnOptions(column.Name, column.Source, + (MissingValueReplacingEstimator.ColumnOptions.ReplacementMode)(column.Kind ?? options.ReplaceWith), column.ImputeBySlot ?? options.ImputeBySlot)); continue; } @@ -185,12 +185,12 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa { throw h.Except("Cannot get a DataKind for type '{0}'", replaceItemType.RawType); } - naConvCols.Add(new TypeConvertingEstimator.ColumnInfo(tmpIsMissingColName, replaceItemTypeKind.ToDataKind(), tmpIsMissingColName)); + naConvCols.Add(new TypeConvertingEstimator.ColumnOptions(tmpIsMissingColName, replaceItemTypeKind.ToDataKind(), tmpIsMissingColName)); } // Add the NAReplaceTransform column. - replaceCols.Add(new MissingValueReplacingEstimator.ColumnInfo(tmpReplacementColName, column.Source, - (MissingValueReplacingEstimator.ColumnInfo.ReplacementMode)(column.Kind ?? options.ReplaceWith), column.ImputeBySlot ?? options.ImputeBySlot)); + replaceCols.Add(new MissingValueReplacingEstimator.ColumnOptions(tmpReplacementColName, column.Source, + (MissingValueReplacingEstimator.ColumnOptions.ReplacementMode)(column.Kind ?? options.ReplaceWith), column.ImputeBySlot ?? options.ImputeBySlot)); // Add the ConcatTransform column. if (replaceType is VectorType) diff --git a/src/Microsoft.ML.Transforms/MissingValueReplacing.cs b/src/Microsoft.ML.Transforms/MissingValueReplacing.cs index 8d7fed1954..c897ff5613 100644 --- a/src/Microsoft.ML.Transforms/MissingValueReplacing.cs +++ b/src/Microsoft.ML.Transforms/MissingValueReplacing.cs @@ -173,7 +173,7 @@ private static string TestType(DataViewType type) return null; } - private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(MissingValueReplacingEstimator.ColumnInfo[] columns) + private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(MissingValueReplacingEstimator.ColumnOptions[] columns) { Contracts.CheckValue(columns, nameof(columns)); return columns.Select(x => (x.Name, x.InputColumnName)).ToArray(); @@ -202,7 +202,7 @@ private protected override void CheckInputColumn(DataViewSchema inputSchema, int throw Host.ExceptParam(nameof(inputSchema), reason); } - internal MissingValueReplacingTransformer(IHostEnvironment env, IDataView input, params MissingValueReplacingEstimator.ColumnInfo[] columns) + internal MissingValueReplacingTransformer(IHostEnvironment env, IDataView input, params MissingValueReplacingEstimator.ColumnOptions[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(MissingValueReplacingTransformer)), GetColumnPairs(columns)) { // Check that all the input columns are present and correct. @@ -269,7 +269,7 @@ private T[] GetValuesArray(VBuffer src, VectorType srcType, int iinfo) /// Vectors default to by-slot imputation unless otherwise specified, except for unknown sized vectors /// which force across-slot imputation. /// - private void GetReplacementValues(IDataView input, MissingValueReplacingEstimator.ColumnInfo[] columns, out object[] repValues, out BitArray[] slotIsDefault, out DataViewType[] types) + private void GetReplacementValues(IDataView input, MissingValueReplacingEstimator.ColumnOptions[] columns, out object[] repValues, out BitArray[] slotIsDefault, out DataViewType[] types) { repValues = new object[columns.Length]; slotIsDefault = new BitArray[columns.Length]; @@ -432,7 +432,7 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa env.CheckValue(input, nameof(input)); env.CheckValue(options.Columns, nameof(options.Columns)); - var cols = new MissingValueReplacingEstimator.ColumnInfo[options.Columns.Length]; + var cols = new MissingValueReplacingEstimator.ColumnOptions[options.Columns.Length]; for (int i = 0; i < cols.Length; i++) { var item = options.Columns[i]; @@ -440,17 +440,17 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa if (!Enum.IsDefined(typeof(ReplacementKind), kind)) throw env.ExceptUserArg(nameof(options.ReplacementKind), "Undefined sorting criteria '{0}' detected for column '{1}'", kind, item.Name); - cols[i] = new MissingValueReplacingEstimator.ColumnInfo( + cols[i] = new MissingValueReplacingEstimator.ColumnOptions( item.Name, item.Source, - (MissingValueReplacingEstimator.ColumnInfo.ReplacementMode)(item.Kind ?? options.ReplacementKind), + (MissingValueReplacingEstimator.ColumnOptions.ReplacementMode)(item.Kind ?? options.ReplacementKind), item.Slot ?? options.ImputeBySlot, item.ReplacementString); }; return new MissingValueReplacingTransformer(env, input, cols).MakeDataTransform(input); } - internal static IDataTransform Create(IHostEnvironment env, IDataView input, params MissingValueReplacingEstimator.ColumnInfo[] columns) + internal static IDataTransform Create(IHostEnvironment env, IDataView input, params MissingValueReplacingEstimator.ColumnOptions[] columns) { return new MissingValueReplacingTransformer(env, input, columns).MakeDataTransform(input); } @@ -895,14 +895,14 @@ public sealed class MissingValueReplacingEstimator : IEstimator /// Describes how the transformer handles one column pair. /// - public sealed class ColumnInfo + public sealed class ColumnOptions { /// /// The possible ways to replace missing values. @@ -951,7 +951,7 @@ public enum ReplacementMode : byte /// If true, per-slot imputation of replacement is performed. /// Otherwise, replacement value is imputed for the entire vector column. This setting is ignored for scalars and variable vectors, /// where imputation is always for the entire column. - public ColumnInfo(string name, string inputColumnName = null, ReplacementMode replacementMode = Defaults.ReplacementMode, + public ColumnOptions(string name, string inputColumnName = null, ReplacementMode replacementMode = Defaults.ReplacementMode, bool imputeBySlot = Defaults.ImputeBySlot) { Contracts.CheckNonWhiteSpace(name, nameof(name)); @@ -962,10 +962,10 @@ public ColumnInfo(string name, string inputColumnName = null, ReplacementMode re } /// - /// This constructor is used internally to convert from to + /// This constructor is used internally to convert from to /// as we support in command line and entrypoint API only. /// - internal ColumnInfo(string name, string inputColumnName, ReplacementMode replacementMode, bool imputeBySlot, string replacementString) + internal ColumnOptions(string name, string inputColumnName, ReplacementMode replacementMode, bool imputeBySlot, string replacementString) : this(name, inputColumnName, replacementMode, imputeBySlot) { ReplacementString = replacementString; @@ -973,16 +973,16 @@ internal ColumnInfo(string name, string inputColumnName, ReplacementMode replace } private readonly IHost _host; - private readonly ColumnInfo[] _columns; + private readonly ColumnOptions[] _columns; - internal MissingValueReplacingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null, ColumnInfo.ReplacementMode replacementKind = Defaults.ReplacementMode) - : this(env, new ColumnInfo(outputColumnName, inputColumnName ?? outputColumnName, replacementKind)) + internal MissingValueReplacingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null, ColumnOptions.ReplacementMode replacementKind = Defaults.ReplacementMode) + : this(env, new ColumnOptions(outputColumnName, inputColumnName ?? outputColumnName, replacementKind)) { } [BestFriend] - internal MissingValueReplacingEstimator(IHostEnvironment env, params ColumnInfo[] columns) + internal MissingValueReplacingEstimator(IHostEnvironment env, params ColumnOptions[] columns) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(nameof(MissingValueReplacingEstimator)); diff --git a/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs b/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs index 9a3e337cf1..4c0066e1b9 100644 --- a/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs +++ b/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs @@ -142,7 +142,7 @@ public ITransformer Fit(IDataView input) var threshold = ComputeThreshold(scores, _slotsInOutput, out int tiedScoresToKeep); // If no slots should be dropped in a column, use CopyColumn to generate the corresponding output column. - SlotsDroppingTransformer.ColumnInfo[] dropSlotsColumns; + SlotsDroppingTransformer.ColumnOptions[] dropSlotsColumns; (string outputColumnName, string inputColumnName)[] copyColumnPairs; CreateDropAndCopyColumns(colArr.Length, scores, threshold, tiedScoresToKeep, _columns.Where(col => colSet.Contains(col.inputColumnName)).ToArray(), out int[] selectedCount, out dropSlotsColumns, out copyColumnPairs); @@ -258,14 +258,14 @@ private static float ComputeThreshold(float[][] scores, int topk, out int tiedSc } private static void CreateDropAndCopyColumns(int size, float[][] scores, float threshold, int tiedScoresToKeep, (string outputColumnName, string inputColumnName)[] cols, - out int[] selectedCount, out SlotsDroppingTransformer.ColumnInfo[] dropSlotsColumns, out (string outputColumnName, string inputColumnName)[] copyColumnsPairs) + out int[] selectedCount, out SlotsDroppingTransformer.ColumnOptions[] dropSlotsColumns, out (string outputColumnName, string inputColumnName)[] copyColumnsPairs) { Contracts.Assert(size > 0); Contracts.Assert(Utils.Size(scores) == size); Contracts.Assert(Utils.Size(cols) == size); Contracts.Assert(threshold > 0 || (threshold == 0 && tiedScoresToKeep == 0)); - var dropCols = new List(); + var dropCols = new List(); var copyCols = new List<(string outputColumnName, string inputColumnName)>(); selectedCount = new int[scores.Length]; for (int i = 0; i < size; i++) @@ -311,7 +311,7 @@ private static void CreateDropAndCopyColumns(int size, float[][] scores, float t if (slots.Count <= 0) copyCols.Add(cols[i]); else - dropCols.Add(new SlotsDroppingTransformer.ColumnInfo(cols[i].outputColumnName, cols[i].inputColumnName, slots.ToArray())); + dropCols.Add(new SlotsDroppingTransformer.ColumnOptions(cols[i].outputColumnName, cols[i].inputColumnName, slots.ToArray())); } dropSlotsColumns = dropCols.ToArray(); copyColumnsPairs = copyCols.ToArray(); diff --git a/src/Microsoft.ML.Transforms/OneHotEncoding.cs b/src/Microsoft.ML.Transforms/OneHotEncoding.cs index 19559d5b33..13895abcff 100644 --- a/src/Microsoft.ML.Transforms/OneHotEncoding.cs +++ b/src/Microsoft.ML.Transforms/OneHotEncoding.cs @@ -126,10 +126,10 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa h.CheckValue(input, nameof(input)); h.CheckUserArg(Utils.Size(options.Columns) > 0, nameof(options.Columns)); - var columns = new List(); + var columns = new List(); foreach (var column in options.Columns) { - var col = new OneHotEncodingEstimator.ColumnInfo( + var col = new OneHotEncodingEstimator.ColumnOptions( column.Name, column.Source ?? column.Name, column.OutputKind ?? options.OutputKind, @@ -184,7 +184,7 @@ internal static class Defaults /// /// Describes how the transformer handles one column pair. /// - public class ColumnInfo : ValueToKeyMappingEstimator.ColumnInfo + public class ColumnOptions : ValueToKeyMappingEstimator.ColumnOptions { public readonly OneHotEncodingTransformer.OutputKind OutputKind; /// @@ -197,7 +197,7 @@ public class ColumnInfo : ValueToKeyMappingEstimator.ColumnInfo /// How items should be ordered when vectorized. If choosen they will be in the order encountered. /// If , items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a'). /// List of terms. - public ColumnInfo(string name, string inputColumnName = null, + public ColumnOptions(string name, string inputColumnName = null, OneHotEncodingTransformer.OutputKind outputKind = Defaults.OutKind, int maxNumTerms = ValueToKeyMappingEstimator.Defaults.MaxNumKeys, ValueToKeyMappingEstimator.SortOrder sort = ValueToKeyMappingEstimator.Defaults.Sort, string[] term = null) @@ -224,11 +224,11 @@ internal void SetTerms(string terms) /// The type of output expected. internal OneHotEncodingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null, OneHotEncodingTransformer.OutputKind outputKind = Defaults.OutKind) - : this(env, new[] { new ColumnInfo(outputColumnName, inputColumnName ?? outputColumnName, outputKind) }) + : this(env, new[] { new ColumnOptions(outputColumnName, inputColumnName ?? outputColumnName, outputKind) }) { } - internal OneHotEncodingEstimator(IHostEnvironment env, ColumnInfo[] columns, IDataView keyData = null) + internal OneHotEncodingEstimator(IHostEnvironment env, ColumnOptions[] columns, IDataView keyData = null) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(nameof(OneHotEncodingEstimator)); @@ -261,7 +261,7 @@ internal OneHotEncodingEstimator(IHostEnvironment env, ColumnInfo[] columns, IDa if (binaryCols.Count > 0) toBinVector = new KeyToBinaryVectorMappingEstimator(_host, binaryCols.Select(x => (x.outputColumnName, x.inputColumnName)).ToArray()); if (cols.Count > 0) - toVector = new KeyToVectorMappingEstimator(_host, cols.Select(x => new KeyToVectorMappingEstimator.ColumnInfo(x.outputColumnName, x.inputColumnName, x.bag)).ToArray()); + toVector = new KeyToVectorMappingEstimator(_host, cols.Select(x => new KeyToVectorMappingEstimator.ColumnOptions(x.outputColumnName, x.inputColumnName, x.bag)).ToArray()); if (toBinVector != null && toVector != null) _toSomething = toVector.Append(toBinVector); diff --git a/src/Microsoft.ML.Transforms/OneHotHashEncoding.cs b/src/Microsoft.ML.Transforms/OneHotHashEncoding.cs index 5f2118f388..3d3e1ec98d 100644 --- a/src/Microsoft.ML.Transforms/OneHotHashEncoding.cs +++ b/src/Microsoft.ML.Transforms/OneHotHashEncoding.cs @@ -151,10 +151,10 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa h.CheckValue(input, nameof(input)); h.CheckUserArg(Utils.Size(options.Columns) > 0, nameof(options.Columns)); - var columns = new List(); + var columns = new List(); foreach (var column in options.Columns) { - var col = new OneHotHashEncodingEstimator.ColumnInfo( + var col = new OneHotHashEncodingEstimator.ColumnOptions( column.Name, column.Source ?? column.Name, column.OutputKind ?? options.OutputKind, @@ -219,9 +219,9 @@ internal static class Defaults /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnInfo + public sealed class ColumnOptions { - public readonly HashingEstimator.ColumnInfo HashInfo; + public readonly HashingEstimator.ColumnOptions HashInfo; public readonly OneHotEncodingTransformer.OutputKind OutputKind; /// @@ -237,14 +237,14 @@ public sealed class ColumnInfo /// Text representation of original values are stored in the slot names of the metadata for the new column.Hashing, as such, can map many initial values to one. /// specifies the upper bound of the number of distinct input values mapping to a hash that should be retained. /// 0 does not retain any input values. -1 retains all input values mapping to each hash. - public ColumnInfo(string name, string inputColumnName = null, + public ColumnOptions(string name, string inputColumnName = null, OneHotEncodingTransformer.OutputKind outputKind = Defaults.OutputKind, int hashBits = Defaults.HashBits, uint seed = Defaults.Seed, bool ordered = Defaults.Ordered, int invertHash = Defaults.InvertHash) { - HashInfo = new HashingEstimator.ColumnInfo(name, inputColumnName ?? name, hashBits, seed, ordered, invertHash); + HashInfo = new HashingEstimator.ColumnOptions(name, inputColumnName ?? name, hashBits, seed, ordered, invertHash); OutputKind = outputKind; } } @@ -272,11 +272,11 @@ internal OneHotHashEncodingEstimator(IHostEnvironment env, int hashBits = Defaults.HashBits, int invertHash = Defaults.InvertHash, OneHotEncodingTransformer.OutputKind outputKind = Defaults.OutputKind) - : this(env, new ColumnInfo(outputColumnName, inputColumnName ?? outputColumnName, outputKind, hashBits, invertHash: invertHash)) + : this(env, new ColumnOptions(outputColumnName, inputColumnName ?? outputColumnName, outputKind, hashBits, invertHash: invertHash)) { } - internal OneHotHashEncodingEstimator(IHostEnvironment env, params ColumnInfo[] columns) + internal OneHotHashEncodingEstimator(IHostEnvironment env, params ColumnOptions[] columns) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(nameof(ValueToKeyMappingEstimator)); @@ -313,7 +313,7 @@ internal OneHotHashEncodingEstimator(IHostEnvironment env, params ColumnInfo[] c if (binaryCols.Count > 0) toBinVector = new KeyToBinaryVectorMappingEstimator(_host, binaryCols.Select(x => (x.outputColumnName, x.inputColumnName)).ToArray()); if (cols.Count > 0) - toVector = new KeyToVectorMappingEstimator(_host, cols.Select(x => new KeyToVectorMappingEstimator.ColumnInfo(x.outputColumnName, x.inputColumnName, x.bag)).ToArray()); + toVector = new KeyToVectorMappingEstimator(_host, cols.Select(x => new KeyToVectorMappingEstimator.ColumnOptions(x.outputColumnName, x.inputColumnName, x.bag)).ToArray()); if (toBinVector != null && toVector != null) _toSomething = toVector.Append(toBinVector); diff --git a/src/Microsoft.ML.Transforms/ProjectionCatalog.cs b/src/Microsoft.ML.Transforms/ProjectionCatalog.cs index 627160b5c5..b36d17295a 100644 --- a/src/Microsoft.ML.Transforms/ProjectionCatalog.cs +++ b/src/Microsoft.ML.Transforms/ProjectionCatalog.cs @@ -39,7 +39,7 @@ public static RandomFourierFeaturizingEstimator CreateRandomFourierFeatures(this /// /// The transform's catalog. /// The input columns to use for the transformation. - public static RandomFourierFeaturizingEstimator CreateRandomFourierFeatures(this TransformsCatalog.ProjectionTransforms catalog, params RandomFourierFeaturizingEstimator.ColumnInfo[] columns) + public static RandomFourierFeaturizingEstimator CreateRandomFourierFeatures(this TransformsCatalog.ProjectionTransforms catalog, params RandomFourierFeaturizingEstimator.ColumnOptions[] columns) => new RandomFourierFeaturizingEstimator(CatalogUtils.GetEnvironment(catalog), columns); /// @@ -66,7 +66,7 @@ public static LpNormalizingEstimator LpNormalize(this TransformsCatalog.Projecti /// /// The transform's catalog. /// Describes the parameters of the lp-normalization process for each column pair. - public static LpNormalizingEstimator LpNormalize(this TransformsCatalog.ProjectionTransforms catalog, params LpNormalizingEstimator.LpNormColumnInfo[] columns) + public static LpNormalizingEstimator LpNormalize(this TransformsCatalog.ProjectionTransforms catalog, params LpNormalizingEstimator.LpNormColumnOptions[] columns) => new LpNormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columns); /// @@ -96,7 +96,7 @@ public static GlobalContrastNormalizingEstimator GlobalContrastNormalize(this Tr /// /// The transform's catalog. /// Describes the parameters of the gcn-normaliztion process for each column pair. - public static GlobalContrastNormalizingEstimator GlobalContrastNormalize(this TransformsCatalog.ProjectionTransforms catalog, params GlobalContrastNormalizingEstimator.GcnColumnInfo[] columns) + public static GlobalContrastNormalizingEstimator GlobalContrastNormalize(this TransformsCatalog.ProjectionTransforms catalog, params GlobalContrastNormalizingEstimator.GcnColumnOptions[] columns) => new GlobalContrastNormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columns); } } diff --git a/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs b/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs index 5890ef09da..bbd01d0977 100644 --- a/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs +++ b/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs @@ -109,7 +109,7 @@ private sealed class TransformInfo private readonly TauswortheHybrid _rand; private readonly TauswortheHybrid.State _state; - public TransformInfo(IHost host, RandomFourierFeaturizingEstimator.ColumnInfo column, int d, float avgDist) + public TransformInfo(IHost host, RandomFourierFeaturizingEstimator.ColumnOptions column, int d, float avgDist) { Contracts.AssertValue(host); @@ -238,7 +238,7 @@ private static string TestColumnType(DataViewType type) return "Expected vector of floats with known size"; } - private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(RandomFourierFeaturizingEstimator.ColumnInfo[] columns) + private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(RandomFourierFeaturizingEstimator.ColumnOptions[] columns) { Contracts.CheckValue(columns, nameof(columns)); return columns.Select(x => (x.Name, x.InputColumnName)).ToArray(); @@ -255,7 +255,7 @@ private protected override void CheckInputColumn(DataViewSchema inputSchema, int new VectorType(NumberDataViewType.Single, _transformInfos[col].SrcDim).ToString(), type.ToString()); } - internal RandomFourierFeaturizingTransformer(IHostEnvironment env, IDataView input, RandomFourierFeaturizingEstimator.ColumnInfo[] columns) + internal RandomFourierFeaturizingTransformer(IHostEnvironment env, IDataView input, RandomFourierFeaturizingEstimator.ColumnOptions[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(RandomFourierFeaturizingTransformer)), GetColumnPairs(columns)) { var avgDistances = GetAvgDistances(columns, input); @@ -281,7 +281,7 @@ private static int RoundUp(int cflt, int cfltAlign) return cblob * cfltAlign; } - private float[] GetAvgDistances(RandomFourierFeaturizingEstimator.ColumnInfo[] columns, IDataView input) + private float[] GetAvgDistances(RandomFourierFeaturizingEstimator.ColumnOptions[] columns, IDataView input) { var avgDistances = new float[columns.Length]; const int reservoirSize = 5000; @@ -420,14 +420,14 @@ private static IDataTransform Create(IHostEnvironment env, Options options, IDat env.CheckValue(input, nameof(input)); env.CheckValue(options.Columns, nameof(options.Columns)); - var cols = new RandomFourierFeaturizingEstimator.ColumnInfo[options.Columns.Length]; + var cols = new RandomFourierFeaturizingEstimator.ColumnOptions[options.Columns.Length]; using (var ch = env.Start("ValidateArgs")) { for (int i = 0; i < cols.Length; i++) { var item = options.Columns[i]; - cols[i] = new RandomFourierFeaturizingEstimator.ColumnInfo( + cols[i] = new RandomFourierFeaturizingEstimator.ColumnOptions( item.Name, item.NewDim ?? options.NewDim, item.UseSin ?? options.UseSin, @@ -618,7 +618,7 @@ internal static class Defaults /// /// Describes how the transformer handles one Gcn column pair. /// - public sealed class ColumnInfo + public sealed class ColumnOptions { /// /// Name of the column resulting from the transformation of . @@ -654,7 +654,7 @@ public sealed class ColumnInfo /// Name of column to transform. /// Which fourier generator to use. /// The seed of the random number generator for generating the new features (if unspecified, the global random is used). - public ColumnInfo(string name, int newDim, bool useSin, string inputColumnName = null, KernelBase generator = null, int? seed = null) + public ColumnOptions(string name, int newDim, bool useSin, string inputColumnName = null, KernelBase generator = null, int? seed = null) { Contracts.CheckUserArg(newDim > 0, nameof(newDim), "must be positive."); InputColumnName = inputColumnName ?? name; @@ -667,7 +667,7 @@ public ColumnInfo(string name, int newDim, bool useSin, string inputColumnName = } private readonly IHost _host; - private readonly ColumnInfo[] _columns; + private readonly ColumnOptions[] _columns; /// /// Convinence constructor for simple one column case. @@ -678,11 +678,11 @@ public ColumnInfo(string name, int newDim, bool useSin, string inputColumnName = /// The number of random Fourier features to create. /// Create two features for every random Fourier frequency? (one for cos and one for sin). internal RandomFourierFeaturizingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null, int newDim = Defaults.NewDim, bool useSin = Defaults.UseSin) - : this(env, new ColumnInfo(outputColumnName, newDim, useSin, inputColumnName ?? outputColumnName)) + : this(env, new ColumnOptions(outputColumnName, newDim, useSin, inputColumnName ?? outputColumnName)) { } - internal RandomFourierFeaturizingEstimator(IHostEnvironment env, params ColumnInfo[] columns) + internal RandomFourierFeaturizingEstimator(IHostEnvironment env, params ColumnOptions[] columns) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(nameof(RandomFourierFeaturizingEstimator)); diff --git a/src/Microsoft.ML.Transforms/Text/LdaTransform.cs b/src/Microsoft.ML.Transforms/Text/LdaTransform.cs index 8e087eb9fc..49dd3c61ad 100644 --- a/src/Microsoft.ML.Transforms/Text/LdaTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/LdaTransform.cs @@ -195,7 +195,7 @@ internal LdaSummary GetLdaDetails(int iinfo) private sealed class LdaState : IDisposable { - internal readonly LatentDirichletAllocationEstimator.ColumnInfo InfoEx; + internal readonly LatentDirichletAllocationEstimator.ColumnOptions InfoEx; private readonly int _numVocab; private readonly object _preparationSyncRoot; private readonly object _testSyncRoot; @@ -208,7 +208,7 @@ private LdaState() _testSyncRoot = new object(); } - internal LdaState(IExceptionContext ectx, LatentDirichletAllocationEstimator.ColumnInfo ex, int numVocab) + internal LdaState(IExceptionContext ectx, LatentDirichletAllocationEstimator.ColumnOptions ex, int numVocab) : this() { Contracts.AssertValue(ectx); @@ -245,7 +245,7 @@ internal LdaState(IExceptionContext ectx, ModelLoadContext ctx) // (serializing term by term, for one term) // int: term_id, int: topic_num, KeyValuePair[]: termTopicVector - InfoEx = new LatentDirichletAllocationEstimator.ColumnInfo(ectx, ctx); + InfoEx = new LatentDirichletAllocationEstimator.ColumnOptions(ectx, ctx); _numVocab = ctx.Reader.ReadInt32(); ectx.CheckDecode(_numVocab > 0); @@ -601,7 +601,7 @@ private static VersionInfo GetVersionInfo() loaderAssemblyName: typeof(LatentDirichletAllocationTransformer).Assembly.FullName); } - private readonly LatentDirichletAllocationEstimator.ColumnInfo[] _columns; + private readonly LatentDirichletAllocationEstimator.ColumnOptions[] _columns; private readonly LdaState[] _ldas; private readonly List>> _columnMappings; @@ -611,7 +611,7 @@ private static VersionInfo GetVersionInfo() internal const string UserName = "Latent Dirichlet Allocation Transform"; internal const string ShortName = "LightLda"; - private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(LatentDirichletAllocationEstimator.ColumnInfo[] columns) + private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(LatentDirichletAllocationEstimator.ColumnOptions[] columns) { Contracts.CheckValue(columns, nameof(columns)); return columns.Select(x => (x.Name, x.InputColumnName)).ToArray(); @@ -627,7 +627,7 @@ private static (string outputColumnName, string inputColumnName)[] GetColumnPair private LatentDirichletAllocationTransformer(IHostEnvironment env, LdaState[] ldas, List>> columnMappings, - params LatentDirichletAllocationEstimator.ColumnInfo[] columns) + params LatentDirichletAllocationEstimator.ColumnOptions[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(LatentDirichletAllocationTransformer)), GetColumnPairs(columns)) { Host.AssertNonEmpty(ColumnPairs); @@ -647,7 +647,7 @@ private LatentDirichletAllocationTransformer(IHost host, ModelLoadContext ctx) : // Note: columnsLength would be just one in most cases. var columnsLength = ColumnPairs.Length; - _columns = new LatentDirichletAllocationEstimator.ColumnInfo[columnsLength]; + _columns = new LatentDirichletAllocationEstimator.ColumnOptions[columnsLength]; _ldas = new LdaState[columnsLength]; for (int i = 0; i < _ldas.Length; i++) { @@ -656,7 +656,7 @@ private LatentDirichletAllocationTransformer(IHost host, ModelLoadContext ctx) : } } - internal static LatentDirichletAllocationTransformer TrainLdaTransformer(IHostEnvironment env, IDataView inputData, params LatentDirichletAllocationEstimator.ColumnInfo[] columns) + internal static LatentDirichletAllocationTransformer TrainLdaTransformer(IHostEnvironment env, IDataView inputData, params LatentDirichletAllocationEstimator.ColumnOptions[] columns) { var ldas = new LdaState[columns.Length]; @@ -706,7 +706,7 @@ private static IDataTransform Create(IHostEnvironment env, Options options, IDat env.CheckValue(input, nameof(input)); env.CheckValue(options.Columns, nameof(options.Columns)); - var cols = options.Columns.Select(colPair => new LatentDirichletAllocationEstimator.ColumnInfo(colPair, options)).ToArray(); + var cols = options.Columns.Select(colPair => new LatentDirichletAllocationEstimator.ColumnOptions(colPair, options)).ToArray(); return TrainLdaTransformer(env, input, cols).MakeDataTransform(input); } @@ -759,7 +759,7 @@ private static int GetFrequency(double value) return result; } - private static List>> Train(IHostEnvironment env, IChannel ch, IDataView inputData, LdaState[] states, params LatentDirichletAllocationEstimator.ColumnInfo[] columns) + private static List>> Train(IHostEnvironment env, IChannel ch, IDataView inputData, LdaState[] states, params LatentDirichletAllocationEstimator.ColumnOptions[] columns) { env.AssertValue(ch); ch.AssertValue(inputData); @@ -934,7 +934,7 @@ internal static class Defaults } private readonly IHost _host; - private readonly ImmutableArray _columns; + private readonly ImmutableArray _columns; /// /// The environment. @@ -964,7 +964,7 @@ internal LatentDirichletAllocationEstimator(IHostEnvironment env, int numSummaryTermPerTopic = Defaults.NumSummaryTermPerTopic, int numBurninIterations = Defaults.NumBurninIterations, bool resetRandomGenerator = Defaults.ResetRandomGenerator) - : this(env, new[] { new ColumnInfo(outputColumnName, inputColumnName ?? outputColumnName, + : this(env, new[] { new ColumnOptions(outputColumnName, inputColumnName ?? outputColumnName, numTopic, alphaSum, beta, mhstep, numIterations, likelihoodInterval, numThreads, numMaxDocToken, numSummaryTermPerTopic, numBurninIterations, resetRandomGenerator) }) { } @@ -972,7 +972,7 @@ internal LatentDirichletAllocationEstimator(IHostEnvironment env, /// /// The environment. /// Describes the parameters of the LDA process for each column pair. - internal LatentDirichletAllocationEstimator(IHostEnvironment env, params ColumnInfo[] columns) + internal LatentDirichletAllocationEstimator(IHostEnvironment env, params ColumnOptions[] columns) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(nameof(LatentDirichletAllocationEstimator)); @@ -982,7 +982,7 @@ internal LatentDirichletAllocationEstimator(IHostEnvironment env, params ColumnI /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnInfo + public sealed class ColumnOptions { /// /// Name of the column resulting from the transformation of . @@ -1053,7 +1053,7 @@ public sealed class ColumnInfo /// The number of words to summarize the topic. /// The number of burn-in iterations. /// Reset the random number generator for each document. - public ColumnInfo(string name, + public ColumnOptions(string name, string inputColumnName = null, int numTopic = LatentDirichletAllocationEstimator.Defaults.NumTopic, float alphaSum = LatentDirichletAllocationEstimator.Defaults.AlphaSum, @@ -1093,7 +1093,7 @@ public ColumnInfo(string name, ResetRandomGenerator = resetRandomGenerator; } - internal ColumnInfo(LatentDirichletAllocationTransformer.Column item, LatentDirichletAllocationTransformer.Options options) : + internal ColumnOptions(LatentDirichletAllocationTransformer.Column item, LatentDirichletAllocationTransformer.Options options) : this(item.Name, item.Source ?? item.Name, item.NumTopic ?? options.NumTopic, @@ -1110,7 +1110,7 @@ internal ColumnInfo(LatentDirichletAllocationTransformer.Column item, LatentDiri { } - internal ColumnInfo(IExceptionContext ectx, ModelLoadContext ctx) + internal ColumnOptions(IExceptionContext ectx, ModelLoadContext ctx) { Contracts.AssertValue(ectx); ectx.AssertValue(ctx); diff --git a/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs b/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs index 0d496bcd84..112b865389 100644 --- a/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs +++ b/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs @@ -172,7 +172,7 @@ private static VersionInfo GetVersionInfo() private const int VersionTransformer = 0x00010003; - private readonly ImmutableArray _columns; + private readonly ImmutableArray _columns; private readonly VBuffer>[] _slotNames; private readonly VectorType[] _slotNamesTypes; @@ -181,7 +181,7 @@ private static VersionInfo GetVersionInfo() /// /// Host Environment. /// Description of dataset columns and how to process them. - internal NgramHashingTransformer(IHostEnvironment env, params NgramHashingEstimator.ColumnInfo[] columns) : + internal NgramHashingTransformer(IHostEnvironment env, params NgramHashingEstimator.ColumnOptions[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(NgramHashingTransformer))) { _columns = columns.ToImmutableArray(); @@ -192,7 +192,7 @@ internal NgramHashingTransformer(IHostEnvironment env, params NgramHashingEstima } } - internal NgramHashingTransformer(IHostEnvironment env, IDataView input, params NgramHashingEstimator.ColumnInfo[] columns) : + internal NgramHashingTransformer(IHostEnvironment env, IDataView input, params NgramHashingEstimator.ColumnOptions[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(NgramHashingTransformer))) { Contracts.CheckValue(columns, nameof(columns)); @@ -289,14 +289,14 @@ private NgramHashingTransformer(IHostEnvironment env, ModelLoadContext ctx, bool } var columnsLength = ctx.Reader.ReadInt32(); Contracts.CheckDecode(columnsLength > 0); - var columns = new NgramHashingEstimator.ColumnInfo[columnsLength]; + var columns = new NgramHashingEstimator.ColumnOptions[columnsLength]; if (!loadLegacy) { // *** Binary format *** // int number of columns // columns for (int i = 0; i < columnsLength; i++) - columns[i] = new NgramHashingEstimator.ColumnInfo(ctx); + columns[i] = new NgramHashingEstimator.ColumnOptions(ctx); } else { @@ -326,7 +326,7 @@ private NgramHashingTransformer(IHostEnvironment env, ModelLoadContext ctx, bool // int number of columns // columns for (int i = 0; i < columnsLength; i++) - columns[i] = new NgramHashingEstimator.ColumnInfo(ctx, outputs[i], inputs[i]); + columns[i] = new NgramHashingEstimator.ColumnOptions(ctx, outputs[i], inputs[i]); } _columns = columns.ToImmutableArray(); TextModelHelper.LoadAll(Host, ctx, columnsLength, out _slotNames, out _slotNamesTypes); @@ -340,14 +340,14 @@ private static IDataTransform Create(IHostEnvironment env, Options options, IDat env.CheckValue(input, nameof(input)); env.CheckValue(options.Column, nameof(options.Column)); - var cols = new NgramHashingEstimator.ColumnInfo[options.Column.Length]; + var cols = new NgramHashingEstimator.ColumnOptions[options.Column.Length]; using (var ch = env.Start("ValidateArgs")) { for (int i = 0; i < cols.Length; i++) { var item = options.Column[i]; - cols[i] = new NgramHashingEstimator.ColumnInfo( + cols[i] = new NgramHashingEstimator.ColumnOptions( item.Name, item.Source ?? new string[] { item.Name }, item.NgramLength ?? options.NgramLength, @@ -873,7 +873,7 @@ public sealed class NgramHashingEstimator : IEstimator /// /// Describes how the transformer handles one pair of mulitple inputs - singular output columns. /// - public sealed class ColumnInfo + public sealed class ColumnOptions { /// Name of the column resulting from the transformation of . public readonly string Name; @@ -922,7 +922,7 @@ public sealed class ColumnInfo /// specifies the upper bound of the number of distinct input values mapping to a hash that should be retained. /// 0 does not retain any input values. -1 retains all input values mapping to each hash. /// Whether to rehash unigrams. - public ColumnInfo(string name, + public ColumnOptions(string name, string[] inputColumnNames, int ngramLength = NgramHashingEstimator.Defaults.NgramLength, int skipLength = NgramHashingEstimator.Defaults.SkipLength, @@ -962,7 +962,7 @@ public ColumnInfo(string name, RehashUnigrams = rehashUnigrams; } - internal ColumnInfo(ModelLoadContext ctx) + internal ColumnOptions(ModelLoadContext ctx) { Contracts.AssertValue(ctx); @@ -995,7 +995,7 @@ internal ColumnInfo(ModelLoadContext ctx) AllLengths = ctx.Reader.ReadBoolByte(); } - internal ColumnInfo(ModelLoadContext ctx, string name, string[] inputColumnNames) + internal ColumnOptions(ModelLoadContext ctx, string name, string[] inputColumnNames) { Contracts.AssertValue(ctx); Contracts.CheckValue(inputColumnNames, nameof(inputColumnNames)); @@ -1073,7 +1073,7 @@ internal static class Defaults } private readonly IHost _host; - private readonly ColumnInfo[] _columns; + private readonly ColumnOptions[] _columns; /// /// Produces a bag of counts of hashed ngrams in @@ -1171,7 +1171,7 @@ internal NgramHashingEstimator(IHostEnvironment env, uint seed = 314489979, bool ordered = true, int invertHash = 0) - : this(env, columns.Select(x => new ColumnInfo(x.outputColumnName, x.inputColumnName, ngramLength, skipLength, allLengths, hashBits, seed, ordered, invertHash)).ToArray()) + : this(env, columns.Select(x => new ColumnOptions(x.outputColumnName, x.inputColumnName, ngramLength, skipLength, allLengths, hashBits, seed, ordered, invertHash)).ToArray()) { } @@ -1185,7 +1185,7 @@ internal NgramHashingEstimator(IHostEnvironment env, /// /// The environment. /// Array of columns which specifies the behavior of the transformation. - internal NgramHashingEstimator(IHostEnvironment env, params ColumnInfo[] columns) + internal NgramHashingEstimator(IHostEnvironment env, params ColumnOptions[] columns) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(nameof(NgramHashingEstimator)); diff --git a/src/Microsoft.ML.Transforms/Text/NgramTransform.cs b/src/Microsoft.ML.Transforms/Text/NgramTransform.cs index 65095bb427..513a00884f 100644 --- a/src/Microsoft.ML.Transforms/Text/NgramTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/NgramTransform.cs @@ -131,7 +131,7 @@ private sealed class TransformInfo public bool RequireIdf => Weighting == NgramExtractingEstimator.WeightingCriteria.Idf || Weighting == NgramExtractingEstimator.WeightingCriteria.TfIdf; - public TransformInfo(NgramExtractingEstimator.ColumnInfo info) + public TransformInfo(NgramExtractingEstimator.ColumnOptions info) { NgramLength = info.NgramLength; SkipLength = info.SkipLength; @@ -191,7 +191,7 @@ internal void Save(ModelSaveContext ctx) // Ngram inverse document frequencies private readonly double[][] _invDocFreqs; - private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(NgramExtractingEstimator.ColumnInfo[] columns) + private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(NgramExtractingEstimator.ColumnOptions[] columns) { Contracts.CheckValue(columns, nameof(columns)); return columns.Select(x => (x.Name, x.InputColumnName)).ToArray(); @@ -204,7 +204,7 @@ private protected override void CheckInputColumn(DataViewSchema inputSchema, int throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", ColumnPairs[col].inputColumnName, NgramExtractingEstimator.ExpectedColumnType, type.ToString()); } - internal NgramExtractingTransformer(IHostEnvironment env, IDataView input, NgramExtractingEstimator.ColumnInfo[] columns) + internal NgramExtractingTransformer(IHostEnvironment env, IDataView input, NgramExtractingEstimator.ColumnOptions[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(NgramExtractingTransformer)), GetColumnPairs(columns)) { var transformInfos = new TransformInfo[columns.Length]; @@ -218,7 +218,7 @@ internal NgramExtractingTransformer(IHostEnvironment env, IDataView input, Ngram _ngramMaps = Train(Host, columns, _transformInfos, input, out _invDocFreqs); } - private static SequencePool[] Train(IHostEnvironment env, NgramExtractingEstimator.ColumnInfo[] columns, ImmutableArray transformInfos, IDataView trainingData, out double[][] invDocFreqs) + private static SequencePool[] Train(IHostEnvironment env, NgramExtractingEstimator.ColumnOptions[] columns, ImmutableArray transformInfos, IDataView trainingData, out double[][] invDocFreqs) { var helpers = new NgramBufferBuilder[columns.Length]; var getters = new ValueGetter>[columns.Length]; @@ -413,7 +413,7 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa env.CheckValue(input, nameof(input)); env.CheckValue(options.Columns, nameof(options.Columns)); - var cols = new NgramExtractingEstimator.ColumnInfo[options.Columns.Length]; + var cols = new NgramExtractingEstimator.ColumnOptions[options.Columns.Length]; using (var ch = env.Start("ValidateArgs")) { @@ -421,7 +421,7 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa { var item = options.Columns[i]; var maxNumTerms = Utils.Size(item.MaxNumTerms) > 0 ? item.MaxNumTerms : options.MaxNumTerms; - cols[i] = new NgramExtractingEstimator.ColumnInfo( + cols[i] = new NgramExtractingEstimator.ColumnOptions( item.Name, item.NgramLength ?? options.NgramLength, item.SkipLength ?? options.SkipLength, @@ -701,7 +701,7 @@ internal static class Defaults } private readonly IHost _host; - private readonly ColumnInfo[] _columns; + private readonly ColumnOptions[] _columns; /// /// Produces a bag of counts of ngrams (sequences of consecutive words) in @@ -744,7 +744,7 @@ internal NgramExtractingEstimator(IHostEnvironment env, bool allLengths = Defaults.AllLengths, int maxNumTerms = Defaults.MaxNumTerms, WeightingCriteria weighting = Defaults.Weighting) - : this(env, columns.Select(x => new ColumnInfo(x.outputColumnName, x.inputColumnName, ngramLength, skipLength, allLengths, weighting, maxNumTerms)).ToArray()) + : this(env, columns.Select(x => new ColumnOptions(x.outputColumnName, x.inputColumnName, ngramLength, skipLength, allLengths, weighting, maxNumTerms)).ToArray()) { } @@ -754,7 +754,7 @@ internal NgramExtractingEstimator(IHostEnvironment env, /// /// The environment. /// Array of columns with information how to transform data. - internal NgramExtractingEstimator(IHostEnvironment env, params ColumnInfo[] columns) + internal NgramExtractingEstimator(IHostEnvironment env, params ColumnOptions[] columns) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(nameof(NgramExtractingEstimator)); @@ -795,7 +795,7 @@ internal static bool IsSchemaColumnValid(SchemaShape.Column col) /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnInfo + public sealed class ColumnOptions { /// Name of the column resulting from the transformation of . public readonly string Name; @@ -825,7 +825,7 @@ public sealed class ColumnInfo /// Whether to store all ngram lengths up to ngramLength, or only ngramLength. /// The weighting criteria. /// Maximum number of ngrams to store in the dictionary. - public ColumnInfo(string name, string inputColumnName = null, + public ColumnOptions(string name, string inputColumnName = null, int ngramLength = Defaults.NgramLength, int skipLength = Defaults.SkipLength, bool allLengths = Defaults.AllLengths, @@ -835,7 +835,7 @@ public ColumnInfo(string name, string inputColumnName = null, { } - internal ColumnInfo(string name, + internal ColumnOptions(string name, int ngramLength, int skipLength, bool allLengths, diff --git a/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs b/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs index a14c455dce..43a27c0d9f 100644 --- a/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs +++ b/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs @@ -57,7 +57,7 @@ internal sealed class PredefinedStopWordsRemoverFactory : IStopWordsRemoverFacto { public IDataTransform CreateComponent(IHostEnvironment env, IDataView input, OneToOneColumn[] columns) { - return new StopWordsRemovingEstimator(env, columns.Select(x => new StopWordsRemovingEstimator.ColumnInfo(x.Name, x.Source)).ToArray()).Fit(input).Transform(input) as IDataTransform; + return new StopWordsRemovingEstimator(env, columns.Select(x => new StopWordsRemovingEstimator.ColumnOptions(x.Name, x.Source)).ToArray()).Fit(input).Transform(input) as IDataTransform; } } @@ -133,9 +133,9 @@ private static VersionInfo GetVersionInfo() /// /// Defines the behavior of the transformer. /// - public IReadOnlyCollection Columns => _columns.AsReadOnly(); + public IReadOnlyCollection Columns => _columns.AsReadOnly(); - private readonly StopWordsRemovingEstimator.ColumnInfo[] _columns; + private readonly StopWordsRemovingEstimator.ColumnOptions[] _columns; private static volatile NormStr.Pool[] _stopWords; private static volatile Dictionary, StopWordsRemovingEstimator.Language> _langsDictionary; @@ -181,7 +181,7 @@ private static NormStr.Pool[] StopWords } } - private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(StopWordsRemovingEstimator.ColumnInfo[] columns) + private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(StopWordsRemovingEstimator.ColumnOptions[] columns) { Contracts.CheckValue(columns, nameof(columns)); return columns.Select(x => (x.Name, x.InputColumnName)).ToArray(); @@ -199,7 +199,7 @@ private protected override void CheckInputColumn(DataViewSchema inputSchema, int /// /// The environment. /// Pairs of columns to remove stop words from. - internal StopWordsRemovingTransformer(IHostEnvironment env, params StopWordsRemovingEstimator.ColumnInfo[] columns) : + internal StopWordsRemovingTransformer(IHostEnvironment env, params StopWordsRemovingEstimator.ColumnOptions[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(RegistrationName), GetColumnPairs(columns)) { _columns = columns; @@ -233,13 +233,13 @@ private StopWordsRemovingTransformer(IHost host, ModelLoadContext ctx) : // foreach column: // int: the stopwords list language // string: the id of languages column name - _columns = new StopWordsRemovingEstimator.ColumnInfo[columnsLength]; + _columns = new StopWordsRemovingEstimator.ColumnOptions[columnsLength]; for (int i = 0; i < columnsLength; i++) { var lang = (StopWordsRemovingEstimator.Language)ctx.Reader.ReadInt32(); Contracts.CheckDecode(Enum.IsDefined(typeof(StopWordsRemovingEstimator.Language), lang)); var langColName = ctx.LoadStringOrNull(); - _columns[i] = new StopWordsRemovingEstimator.ColumnInfo(ColumnPairs[i].outputColumnName, ColumnPairs[i].inputColumnName, lang, langColName); + _columns[i] = new StopWordsRemovingEstimator.ColumnOptions(ColumnPairs[i].outputColumnName, ColumnPairs[i].inputColumnName, lang, langColName); } } @@ -261,11 +261,11 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa env.CheckValue(input, nameof(input)); env.CheckValue(options.Columns, nameof(options.Columns)); - var cols = new StopWordsRemovingEstimator.ColumnInfo[options.Columns.Length]; + var cols = new StopWordsRemovingEstimator.ColumnOptions[options.Columns.Length]; for (int i = 0; i < cols.Length; i++) { var item = options.Columns[i]; - cols[i] = new StopWordsRemovingEstimator.ColumnInfo( + cols[i] = new StopWordsRemovingEstimator.ColumnOptions( item.Name, item.Source ?? item.Name, item.Language ?? options.Language, @@ -493,7 +493,7 @@ public sealed class StopWordsRemovingEstimator : TrivialEstimator /// Describes how the transformer handles one column pair. /// - public sealed class ColumnInfo + public sealed class ColumnOptions { /// Name of the column resulting from the transformation of . public readonly string Name; @@ -511,7 +511,7 @@ public sealed class ColumnInfo /// Name of the column to transform. If set to , the value of the will be used as source. /// Language-specific stop words list. /// Optional column to use for languages. This overrides language value. - public ColumnInfo(string name, + public ColumnOptions(string name, string inputColumnName = null, Language language = Defaults.DefaultLanguage, string languageColumn = null) @@ -581,11 +581,11 @@ internal StopWordsRemovingEstimator(IHostEnvironment env, string outputColumnNam /// Pairs of columns to remove stop words on. /// Langauge of the input text columns . internal StopWordsRemovingEstimator(IHostEnvironment env, (string outputColumnName, string inputColumnName)[] columns, Language language = Language.English) - : this(env, columns.Select(x => new ColumnInfo(x.outputColumnName, x.inputColumnName, language)).ToArray()) + : this(env, columns.Select(x => new ColumnOptions(x.outputColumnName, x.inputColumnName, language)).ToArray()) { } - internal StopWordsRemovingEstimator(IHostEnvironment env, params ColumnInfo[] columns) + internal StopWordsRemovingEstimator(IHostEnvironment env, params ColumnOptions[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(StopWordsRemovingEstimator)), new StopWordsRemovingTransformer(env, columns)) { } diff --git a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs index b265de0634..3117f1199c 100644 --- a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs +++ b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs @@ -73,8 +73,8 @@ public static TokenizingByCharactersEstimator TokenizeCharacters(this Transforms public static TokenizingByCharactersEstimator TokenizeCharacters(this TransformsCatalog.TextTransforms catalog, bool useMarkerCharacters = CharTokenizingDefaults.UseMarkerCharacters, - params SimpleColumnInfo[] columns) - => new TokenizingByCharactersEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), useMarkerCharacters, SimpleColumnInfo.ConvertToValueTuples(columns)); + params ColumnOptions[] columns) + => new TokenizingByCharactersEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), useMarkerCharacters, ColumnOptions.ConvertToValueTuples(columns)); /// /// Normalizes incoming text in by changing case, removing diacritical marks, punctuation marks and/or numbers @@ -147,7 +147,7 @@ public static WordEmbeddingsExtractingEstimator ExtractWordEmbeddings(this Trans /// public static WordEmbeddingsExtractingEstimator ExtractWordEmbeddings(this TransformsCatalog.TextTransforms catalog, WordEmbeddingsExtractingEstimator.PretrainedModelKind modelKind = WordEmbeddingsExtractingEstimator.PretrainedModelKind.Sswe, - params WordEmbeddingsExtractingEstimator.ColumnInfo[] columns) + params WordEmbeddingsExtractingEstimator.ColumnOptions[] columns) => new WordEmbeddingsExtractingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), modelKind, columns); /// @@ -181,7 +181,7 @@ public static WordTokenizingEstimator TokenizeWords(this TransformsCatalog.TextT /// The text-related transform's catalog. /// Pairs of columns to run the tokenization on. public static WordTokenizingEstimator TokenizeWords(this TransformsCatalog.TextTransforms catalog, - params WordTokenizingEstimator.ColumnInfo[] columns) + params WordTokenizingEstimator.ColumnOptions[] columns) => new WordTokenizingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), columns); /// @@ -242,7 +242,7 @@ public static NgramExtractingEstimator ProduceNgrams(this TransformsCatalog.Text /// The text-related transform's catalog. /// Pairs of columns to run the ngram process on. public static NgramExtractingEstimator ProduceNgrams(this TransformsCatalog.TextTransforms catalog, - params NgramExtractingEstimator.ColumnInfo[] columns) + params NgramExtractingEstimator.ColumnOptions[] columns) => new NgramExtractingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), columns); /// @@ -623,7 +623,7 @@ public static LatentDirichletAllocationEstimator LatentDirichletAllocation(this /// Describes the parameters of LDA for each column pair. public static LatentDirichletAllocationEstimator LatentDirichletAllocation( this TransformsCatalog.TextTransforms catalog, - params LatentDirichletAllocationEstimator.ColumnInfo[] columns) + params LatentDirichletAllocationEstimator.ColumnOptions[] columns) => new LatentDirichletAllocationEstimator(CatalogUtils.GetEnvironment(catalog), columns); } } diff --git a/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs b/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs index 5b66e33efa..523733f911 100644 --- a/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs +++ b/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs @@ -362,11 +362,11 @@ public ITransformer Fit(IDataView input) if (tparams.NeedsWordTokenizationTransform) { - var xfCols = new WordTokenizingEstimator.ColumnInfo[textCols.Length]; + var xfCols = new WordTokenizingEstimator.ColumnOptions[textCols.Length]; wordTokCols = new string[textCols.Length]; for (int i = 0; i < textCols.Length; i++) { - var col = new WordTokenizingEstimator.ColumnInfo(GenerateColumnName(view.Schema, textCols[i], "WordTokenizer"), textCols[i]); + var col = new WordTokenizingEstimator.ColumnOptions(GenerateColumnName(view.Schema, textCols[i], "WordTokenizer"), textCols[i]); xfCols[i] = col; wordTokCols[i] = col.Name; tempCols.Add(col.Name); @@ -378,12 +378,12 @@ public ITransformer Fit(IDataView input) if (tparams.UsePredefinedStopWordRemover) { Contracts.Assert(wordTokCols != null, "StopWords transform requires that word tokenization has been applied to the input text."); - var xfCols = new StopWordsRemovingEstimator.ColumnInfo[wordTokCols.Length]; + var xfCols = new StopWordsRemovingEstimator.ColumnOptions[wordTokCols.Length]; var dstCols = new string[wordTokCols.Length]; for (int i = 0; i < wordTokCols.Length; i++) { var tempName = GenerateColumnName(view.Schema, wordTokCols[i], "StopWordsRemoverTransform"); - var col = new StopWordsRemovingEstimator.ColumnInfo(tempName, wordTokCols[i], tparams.StopwordsLanguage); + var col = new StopWordsRemovingEstimator.ColumnOptions(tempName, wordTokCols[i], tparams.StopwordsLanguage); dstCols[i] = tempName; tempCols.Add(tempName); @@ -443,13 +443,13 @@ public ITransformer Fit(IDataView input) if (tparams.VectorNormalizer != TextNormKind.None) { - var xfCols = new List(2); + var xfCols = new List(2); if (charFeatureCol != null) { var dstCol = GenerateColumnName(view.Schema, charFeatureCol, "LpCharNorm"); tempCols.Add(dstCol); - xfCols.Add(new LpNormalizingEstimator.LpNormColumnInfo(dstCol, charFeatureCol, normalizerKind: tparams.LpNormalizerKind)); + xfCols.Add(new LpNormalizingEstimator.LpNormColumnOptions(dstCol, charFeatureCol, normalizerKind: tparams.LpNormalizerKind)); charFeatureCol = dstCol; } @@ -457,7 +457,7 @@ public ITransformer Fit(IDataView input) { var dstCol = GenerateColumnName(view.Schema, wordFeatureCol, "LpWordNorm"); tempCols.Add(dstCol); - xfCols.Add(new LpNormalizingEstimator.LpNormColumnInfo(dstCol, wordFeatureCol, normalizerKind: tparams.LpNormalizerKind)); + xfCols.Add(new LpNormalizingEstimator.LpNormColumnOptions(dstCol, wordFeatureCol, normalizerKind: tparams.LpNormalizerKind)); wordFeatureCol = dstCol; } @@ -487,7 +487,7 @@ public ITransformer Fit(IDataView input) } if (srcTaggedCols.Count > 0) { - view = new ColumnConcatenatingTransformer(h, new ColumnConcatenatingTransformer.ColumnInfo(OutputColumn, + view = new ColumnConcatenatingTransformer(h, new ColumnConcatenatingTransformer.ColumnOptions(OutputColumn, srcTaggedCols.Select(kvp => (kvp.Value, kvp.Key)))) .Transform(view); } diff --git a/src/Microsoft.ML.Transforms/Text/WordBagTransform.cs b/src/Microsoft.ML.Transforms/Text/WordBagTransform.cs index b217c32173..b0e94b26fc 100644 --- a/src/Microsoft.ML.Transforms/Text/WordBagTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/WordBagTransform.cs @@ -115,7 +115,7 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa // REVIEW: In order to make it possible to output separate bags for different columns // using the same dictionary, we need to find a way to make ConcatTransform remember the boundaries. - var tokenizeColumns = new WordTokenizingEstimator.ColumnInfo[options.Columns.Length]; + var tokenizeColumns = new WordTokenizingEstimator.ColumnOptions[options.Columns.Length]; var extractorArgs = new NgramExtractorTransform.Options() @@ -135,7 +135,7 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa h.CheckUserArg(Utils.Size(column.Source) > 0, nameof(column.Source)); h.CheckUserArg(column.Source.All(src => !string.IsNullOrWhiteSpace(src)), nameof(column.Source)); - tokenizeColumns[iinfo] = new WordTokenizingEstimator.ColumnInfo(column.Name, column.Source.Length > 1 ? column.Name : column.Source[0]); + tokenizeColumns[iinfo] = new WordTokenizingEstimator.ColumnOptions(column.Name, column.Source.Length > 1 ? column.Name : column.Source[0]); extractorArgs.Columns[iinfo] = new NgramExtractorTransform.Column() @@ -340,11 +340,11 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa view = new MissingValueDroppingTransformer(h, missingDropColumns.Select(x => (x, x)).ToArray()).Transform(view); } - var ngramColumns = new NgramExtractingEstimator.ColumnInfo[options.Columns.Length]; + var ngramColumns = new NgramExtractingEstimator.ColumnOptions[options.Columns.Length]; for (int iinfo = 0; iinfo < options.Columns.Length; iinfo++) { var column = options.Columns[iinfo]; - ngramColumns[iinfo] = new NgramExtractingEstimator.ColumnInfo(column.Name, + ngramColumns[iinfo] = new NgramExtractingEstimator.ColumnOptions(column.Name, column.NgramLength ?? options.NgramLength, column.SkipLength ?? options.SkipLength, column.AllLengths ?? options.AllLengths, @@ -505,7 +505,7 @@ public static IDataView ApplyConcatOnSources(IHostEnvironment env, ManyToOneColu env.CheckValue(input, nameof(input)); IDataView view = input; - var concatColumns = new List(); + var concatColumns = new List(); foreach (var col in columns) { env.CheckUserArg(col != null, nameof(WordBagBuildingTransformer.Options.Columns)); @@ -513,7 +513,7 @@ public static IDataView ApplyConcatOnSources(IHostEnvironment env, ManyToOneColu env.CheckUserArg(Utils.Size(col.Source) > 0, nameof(col.Source)); env.CheckUserArg(col.Source.All(src => !string.IsNullOrWhiteSpace(src)), nameof(col.Source)); if (col.Source.Length > 1) - concatColumns.Add(new ColumnConcatenatingTransformer.ColumnInfo(col.Name, col.Source)); + concatColumns.Add(new ColumnConcatenatingTransformer.ColumnOptions(col.Name, col.Source)); } if (concatColumns.Count > 0) return new ColumnConcatenatingTransformer(env, concatColumns.ToArray()).Transform(view); diff --git a/src/Microsoft.ML.Transforms/Text/WordEmbeddingsExtractor.cs b/src/Microsoft.ML.Transforms/Text/WordEmbeddingsExtractor.cs index 584d7b73ba..01f5e055d0 100644 --- a/src/Microsoft.ML.Transforms/Text/WordEmbeddingsExtractor.cs +++ b/src/Microsoft.ML.Transforms/Text/WordEmbeddingsExtractor.cs @@ -164,7 +164,7 @@ public List GetWordLabels() /// The pretrained word embedding model. internal WordEmbeddingsExtractingTransformer(IHostEnvironment env, string outputColumnName, string inputColumnName = null, WordEmbeddingsExtractingEstimator.PretrainedModelKind modelKind = WordEmbeddingsExtractingEstimator.PretrainedModelKind.Sswe) - : this(env, modelKind, new WordEmbeddingsExtractingEstimator.ColumnInfo(outputColumnName, inputColumnName ?? outputColumnName)) + : this(env, modelKind, new WordEmbeddingsExtractingEstimator.ColumnOptions(outputColumnName, inputColumnName ?? outputColumnName)) { } @@ -176,7 +176,7 @@ internal WordEmbeddingsExtractingTransformer(IHostEnvironment env, string output /// Filename for custom word embedding model. /// Name of the column to transform. If set to , the value of the will be used as source. internal WordEmbeddingsExtractingTransformer(IHostEnvironment env, string outputColumnName, string customModelFile, string inputColumnName = null) - : this(env, customModelFile, new WordEmbeddingsExtractingEstimator.ColumnInfo(outputColumnName, inputColumnName ?? outputColumnName)) + : this(env, customModelFile, new WordEmbeddingsExtractingEstimator.ColumnOptions(outputColumnName, inputColumnName ?? outputColumnName)) { } @@ -186,7 +186,7 @@ internal WordEmbeddingsExtractingTransformer(IHostEnvironment env, string output /// Host Environment. /// The pretrained word embedding model. /// Input/Output columns. - internal WordEmbeddingsExtractingTransformer(IHostEnvironment env, WordEmbeddingsExtractingEstimator.PretrainedModelKind modelKind, params WordEmbeddingsExtractingEstimator.ColumnInfo[] columns) + internal WordEmbeddingsExtractingTransformer(IHostEnvironment env, WordEmbeddingsExtractingEstimator.PretrainedModelKind modelKind, params WordEmbeddingsExtractingEstimator.ColumnOptions[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(RegistrationName), GetColumnPairs(columns)) { env.CheckUserArg(Enum.IsDefined(typeof(WordEmbeddingsExtractingEstimator.PretrainedModelKind), modelKind), nameof(modelKind)); @@ -202,7 +202,7 @@ internal WordEmbeddingsExtractingTransformer(IHostEnvironment env, WordEmbedding /// Host Environment. /// Filename for custom word embedding model. /// Input/Output columns. - internal WordEmbeddingsExtractingTransformer(IHostEnvironment env, string customModelFile, params WordEmbeddingsExtractingEstimator.ColumnInfo[] columns) + internal WordEmbeddingsExtractingTransformer(IHostEnvironment env, string customModelFile, params WordEmbeddingsExtractingEstimator.ColumnOptions[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(RegistrationName), GetColumnPairs(columns)) { env.CheckValue(customModelFile, nameof(customModelFile)); @@ -214,7 +214,7 @@ internal WordEmbeddingsExtractingTransformer(IHostEnvironment env, string custom _currentVocab = GetVocabularyDictionary(env); } - private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(WordEmbeddingsExtractingEstimator.ColumnInfo[] columns) + private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(WordEmbeddingsExtractingEstimator.ColumnOptions[] columns) { Contracts.CheckValue(columns, nameof(columns)); return columns.Select(x => (x.Name, x.InputColumnName)).ToArray(); @@ -233,11 +233,11 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa env.CheckValue(options.Columns, nameof(options.Columns)); - var cols = new WordEmbeddingsExtractingEstimator.ColumnInfo[options.Columns.Length]; + var cols = new WordEmbeddingsExtractingEstimator.ColumnOptions[options.Columns.Length]; for (int i = 0; i < cols.Length; i++) { var item = options.Columns[i]; - cols[i] = new WordEmbeddingsExtractingEstimator.ColumnInfo( + cols[i] = new WordEmbeddingsExtractingEstimator.ColumnOptions( item.Name, item.Source ?? item.Name); } @@ -739,7 +739,7 @@ private static ParallelOptions GetParallelOptions(IHostEnvironment hostEnvironme public sealed class WordEmbeddingsExtractingEstimator : IEstimator { private readonly IHost _host; - private readonly ColumnInfo[] _columns; + private readonly ColumnOptions[] _columns; private readonly PretrainedModelKind? _modelKind; private readonly string _customLookupTable; @@ -755,7 +755,7 @@ public sealed class WordEmbeddingsExtractingEstimator : IEstimatorThe embeddings to use. internal WordEmbeddingsExtractingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null, PretrainedModelKind modelKind = PretrainedModelKind.Sswe) - : this(env, modelKind, new ColumnInfo(outputColumnName, inputColumnName ?? outputColumnName)) + : this(env, modelKind, new ColumnOptions(outputColumnName, inputColumnName ?? outputColumnName)) { } @@ -770,7 +770,7 @@ internal WordEmbeddingsExtractingEstimator(IHostEnvironment env, string outputCo /// The path of the pre-trained embeedings model to use. /// Name of the column to transform. internal WordEmbeddingsExtractingEstimator(IHostEnvironment env, string outputColumnName, string customModelFile, string inputColumnName = null) - : this(env, customModelFile, new ColumnInfo(outputColumnName, inputColumnName ?? outputColumnName)) + : this(env, customModelFile, new ColumnOptions(outputColumnName, inputColumnName ?? outputColumnName)) { } @@ -785,7 +785,7 @@ internal WordEmbeddingsExtractingEstimator(IHostEnvironment env, string outputCo /// The array columns, and per-column configurations to extract embeedings from. internal WordEmbeddingsExtractingEstimator(IHostEnvironment env, PretrainedModelKind modelKind = PretrainedModelKind.Sswe, - params ColumnInfo[] columns) + params ColumnOptions[] columns) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(nameof(WordEmbeddingsExtractingEstimator)); @@ -794,7 +794,7 @@ internal WordEmbeddingsExtractingEstimator(IHostEnvironment env, _columns = columns; } - internal WordEmbeddingsExtractingEstimator(IHostEnvironment env, string customModelFile, params ColumnInfo[] columns) + internal WordEmbeddingsExtractingEstimator(IHostEnvironment env, string customModelFile, params ColumnOptions[] columns) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(nameof(WordEmbeddingsExtractingEstimator)); @@ -841,7 +841,7 @@ public enum PretrainedModelKind /// /// Information for each column pair. /// - public sealed class ColumnInfo + public sealed class ColumnOptions { /// /// Name of the column resulting from the transformation of . @@ -858,7 +858,7 @@ public sealed class ColumnInfo /// Name of the column resulting from the transformation of . /// Name of column to transform. If set to will be used as source. - public ColumnInfo(string name, string inputColumnName = null) + public ColumnOptions(string name, string inputColumnName = null) { Contracts.CheckNonEmpty(name, nameof(name)); diff --git a/src/Microsoft.ML.Transforms/Text/WordHashBagProducingTransform.cs b/src/Microsoft.ML.Transforms/Text/WordHashBagProducingTransform.cs index eb387521a1..b10c9f91da 100644 --- a/src/Microsoft.ML.Transforms/Text/WordHashBagProducingTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/WordHashBagProducingTransform.cs @@ -103,7 +103,7 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa var uniqueSourceNames = NgramExtractionUtils.GenerateUniqueSourceNames(h, options.Columns, view.Schema); Contracts.Assert(uniqueSourceNames.Length == options.Columns.Length); - var tokenizeColumns = new List(); + var tokenizeColumns = new List(); var extractorCols = new NgramHashExtractingTransformer.Column[options.Columns.Length]; var colCount = options.Columns.Length; List tmpColNames = new List(); @@ -114,7 +114,7 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa var curTmpNames = new string[srcCount]; Contracts.Assert(uniqueSourceNames[iinfo].Length == options.Columns[iinfo].Source.Length); for (int isrc = 0; isrc < srcCount; isrc++) - tokenizeColumns.Add(new WordTokenizingEstimator.ColumnInfo(curTmpNames[isrc] = uniqueSourceNames[iinfo][isrc], options.Columns[iinfo].Source[isrc])); + tokenizeColumns.Add(new WordTokenizingEstimator.ColumnOptions(curTmpNames[isrc] = uniqueSourceNames[iinfo][isrc], options.Columns[iinfo].Source[isrc])); tmpColNames.AddRange(curTmpNames); extractorCols[iinfo] = @@ -331,8 +331,8 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa if (termLoaderArgs != null) termCols = new List(); - var hashColumns = new List(); - var ngramHashColumns = new NgramHashingEstimator.ColumnInfo[options.Columns.Length]; + var hashColumns = new List(); + var ngramHashColumns = new NgramHashingEstimator.ColumnOptions[options.Columns.Length]; var colCount = options.Columns.Length; // The NGramHashExtractor has a ManyToOne column type. To avoid stepping over the source @@ -361,12 +361,12 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa }); } - hashColumns.Add(new HashingEstimator.ColumnInfo(tmpName, termLoaderArgs == null ? column.Source[isrc] : tmpName, + hashColumns.Add(new HashingEstimator.ColumnOptions(tmpName, termLoaderArgs == null ? column.Source[isrc] : tmpName, 30, column.Seed ?? options.Seed, false, column.InvertHash ?? options.InvertHash)); } ngramHashColumns[iinfo] = - new NgramHashingEstimator.ColumnInfo(column.Name, tmpColNames[iinfo], + new NgramHashingEstimator.ColumnOptions(column.Name, tmpColNames[iinfo], column.NgramLength ?? options.NgramLength, column.SkipLength ?? options.SkipLength, column.AllLengths ?? options.AllLengths, diff --git a/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs b/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs index 6bb47ce1bb..87a6cb866d 100644 --- a/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs +++ b/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs @@ -106,16 +106,16 @@ private static VersionInfo GetVersionInfo() private const string RegistrationName = "DelimitedTokenize"; - public IReadOnlyCollection Columns => _columns.AsReadOnly(); - private readonly WordTokenizingEstimator.ColumnInfo[] _columns; + public IReadOnlyCollection Columns => _columns.AsReadOnly(); + private readonly WordTokenizingEstimator.ColumnOptions[] _columns; - private static (string name, string inputColumnName)[] GetColumnPairs(WordTokenizingEstimator.ColumnInfo[] columns) + private static (string name, string inputColumnName)[] GetColumnPairs(WordTokenizingEstimator.ColumnOptions[] columns) { Contracts.CheckNonEmpty(columns, nameof(columns)); return columns.Select(x => (x.Name, x.InputColumnName)).ToArray(); } - internal WordTokenizingTransformer(IHostEnvironment env, params WordTokenizingEstimator.ColumnInfo[] columns) : + internal WordTokenizingTransformer(IHostEnvironment env, params WordTokenizingEstimator.ColumnOptions[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(RegistrationName), GetColumnPairs(columns)) { _columns = columns.ToArray(); @@ -132,7 +132,7 @@ private WordTokenizingTransformer(IHost host, ModelLoadContext ctx) : base(host, ctx) { var columnsLength = ColumnPairs.Length; - _columns = new WordTokenizingEstimator.ColumnInfo[columnsLength]; + _columns = new WordTokenizingEstimator.ColumnOptions[columnsLength]; // *** Binary format *** // // for each added column @@ -141,7 +141,7 @@ private WordTokenizingTransformer(IHost host, ModelLoadContext ctx) : { var separators = ctx.Reader.ReadCharArray(); Contracts.CheckDecode(Utils.Size(separators) > 0); - _columns[i] = new WordTokenizingEstimator.ColumnInfo(ColumnPairs[i].outputColumnName, ColumnPairs[i].inputColumnName, separators); + _columns[i] = new WordTokenizingEstimator.ColumnOptions(ColumnPairs[i].outputColumnName, ColumnPairs[i].inputColumnName, separators); } } @@ -182,12 +182,12 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa env.CheckValue(input, nameof(input)); env.CheckValue(options.Columns, nameof(options.Columns)); - var cols = new WordTokenizingEstimator.ColumnInfo[options.Columns.Length]; + var cols = new WordTokenizingEstimator.ColumnOptions[options.Columns.Length]; for (int i = 0; i < cols.Length; i++) { var item = options.Columns[i]; var separators = options.CharArrayTermSeparators ?? PredictionUtil.SeparatorFromString(item.TermSeparators ?? options.TermSeparators); - cols[i] = new WordTokenizingEstimator.ColumnInfo(item.Name, item.Source ?? item.Name, separators); + cols[i] = new WordTokenizingEstimator.ColumnOptions(item.Name, item.Source ?? item.Name, separators); } return new WordTokenizingTransformer(env, cols).MakeDataTransform(input); @@ -427,7 +427,7 @@ internal WordTokenizingEstimator(IHostEnvironment env, string outputColumnName, /// Pairs of columns to run the tokenization on. /// The separators to use (uses space character by default). internal WordTokenizingEstimator(IHostEnvironment env, (string outputColumnName, string inputColumnName)[] columns, char[] separators = null) - : this(env, columns.Select(x => new ColumnInfo(x.outputColumnName, x.inputColumnName, separators)).ToArray()) + : this(env, columns.Select(x => new ColumnOptions(x.outputColumnName, x.inputColumnName, separators)).ToArray()) { } @@ -436,11 +436,11 @@ internal WordTokenizingEstimator(IHostEnvironment env, (string outputColumnName, /// /// The environment. /// Pairs of columns to run the tokenization on. - internal WordTokenizingEstimator(IHostEnvironment env, params ColumnInfo[] columns) + internal WordTokenizingEstimator(IHostEnvironment env, params ColumnOptions[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(WordTokenizingEstimator)), new WordTokenizingTransformer(env, columns)) { } - public sealed class ColumnInfo + public sealed class ColumnOptions { public readonly string Name; public readonly string InputColumnName; @@ -452,7 +452,7 @@ public sealed class ColumnInfo /// Name of the column resulting from the transformation of . /// Name of column to transform. If set to , the value of the will be used as source. /// Casing text using the rules of the invariant culture. If not specified, space will be used as separator. - public ColumnInfo(string name, string inputColumnName = null, char[] separators = null) + public ColumnOptions(string name, string inputColumnName = null, char[] separators = null) { Name = name; InputColumnName = inputColumnName ?? name; diff --git a/src/Microsoft.ML.Transforms/UngroupTransform.cs b/src/Microsoft.ML.Transforms/UngroupTransform.cs index 6f397bf981..fedb248f7b 100644 --- a/src/Microsoft.ML.Transforms/UngroupTransform.cs +++ b/src/Microsoft.ML.Transforms/UngroupTransform.cs @@ -218,14 +218,14 @@ private static bool ShouldPreserveMetadata(string kind) } } - public readonly struct PivotColumnInfo + public readonly struct PivotColumnOptions { public readonly string Name; public readonly int Index; public readonly int Size; public readonly PrimitiveDataViewType ItemType; - public PivotColumnInfo(string name, int index, int size, PrimitiveDataViewType itemType) + public PivotColumnOptions(string name, int index, int size, PrimitiveDataViewType itemType) { Contracts.AssertNonEmpty(name); Contracts.Assert(index >= 0); @@ -244,7 +244,7 @@ public PivotColumnInfo(string name, int index, int size, PrimitiveDataViewType i /// /// Information of columns to be ungrouped in . /// - private readonly PivotColumnInfo[] _infos; + private readonly PivotColumnOptions[] _infos; /// /// [i] is -1 means that the i-th column in both of and @@ -319,13 +319,13 @@ public UngroupBinding(IExceptionContext ectx, DataViewSchema inputSchema, Ungrou } private static void Bind(IExceptionContext ectx, DataViewSchema inputSchema, - string[] pivotColumns, out PivotColumnInfo[] infos) + string[] pivotColumns, out PivotColumnOptions[] infos) { Contracts.AssertValueOrNull(ectx); ectx.AssertValue(inputSchema); ectx.AssertNonEmpty(pivotColumns); - infos = new PivotColumnInfo[pivotColumns.Length]; + infos = new PivotColumnOptions[pivotColumns.Length]; for (int i = 0; i < pivotColumns.Length; i++) { var name = pivotColumns[i]; @@ -337,7 +337,7 @@ private static void Bind(IExceptionContext ectx, DataViewSchema inputSchema, if (!(inputSchema[col].Type is VectorType colType)) throw ectx.ExceptUserArg(nameof(Options.Columns), "Pivot column '{0}' has type '{1}', but must be a vector of primitive types", name, inputSchema[col].Type); - infos[i] = new PivotColumnInfo(name, col, colType.Size, colType.ItemType); + infos[i] = new PivotColumnOptions(name, col, colType.Size, colType.ItemType); } } @@ -399,13 +399,13 @@ public int PivotColumnCount get { return _infos.Length; } } - public PivotColumnInfo GetPivotColumnInfo(int iinfo) + public PivotColumnOptions GetPivotColumnOptions(int iinfo) { _ectx.Assert(0 <= iinfo && iinfo < _infos.Length); return _infos[iinfo]; } - public PivotColumnInfo GetPivotColumnInfoByCol(int col) + public PivotColumnOptions GetPivotColumnOptionsByCol(int col) { _ectx.Assert(0 <= col && col < _inputSchema.Count); _ectx.Assert(_pivotIndex[col] >= 0); @@ -487,7 +487,7 @@ public Cursor(IChannelProvider provider, DataViewRowCursor input, UngroupBinding var needed = new List>(); for (int i = 0; i < sizeColumnsLim; i++) { - var info = _ungroupBinding.GetPivotColumnInfo(i); + var info = _ungroupBinding.GetPivotColumnOptions(i); if (info.Size > 0) { if (_fixedSize == 0) @@ -615,7 +615,7 @@ public override ValueGetter GetGetter(int col) return Input.GetGetter(col); if (_cachedGetters[col] == null) - _cachedGetters[col] = MakeGetter(col, _ungroupBinding.GetPivotColumnInfoByCol(col).ItemType); + _cachedGetters[col] = MakeGetter(col, _ungroupBinding.GetPivotColumnOptionsByCol(col).ItemType); var result = _cachedGetters[col] as ValueGetter; Ch.Check(result != null, "Unexpected getter type requested"); diff --git a/test/Microsoft.ML.Benchmarks/HashBench.cs b/test/Microsoft.ML.Benchmarks/HashBench.cs index fb36643089..6372fa862a 100644 --- a/test/Microsoft.ML.Benchmarks/HashBench.cs +++ b/test/Microsoft.ML.Benchmarks/HashBench.cs @@ -73,7 +73,7 @@ private void InitMap(T val, DataViewType type, int hashBits = 20, ValueGetter getter = (ref T dst) => dst = val; _inRow = RowImpl.Create(type, getter); // One million features is a nice, typical number. - var info = new HashingEstimator.ColumnInfo("Bar", "Foo", hashBits: hashBits); + var info = new HashingEstimator.ColumnOptions("Bar", "Foo", hashBits: hashBits); var xf = new HashingTransformer(_env, new[] { info }); var mapper = ((ITransformer)xf).GetRowToRowMapper(_inRow.Schema); var column = mapper.OutputSchema["Bar"]; diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs index b01c4422fe..2bccf68ed2 100644 --- a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs +++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs @@ -740,8 +740,8 @@ public void EntryPointPipelineEnsemble() { var data = splitOutput.TrainData[i]; data = new RandomFourierFeaturizingEstimator(Env, new[] { - new RandomFourierFeaturizingEstimator.ColumnInfo("Features1", 10, false, "Features"), - new RandomFourierFeaturizingEstimator.ColumnInfo("Features2", 10, false, "Features"), + new RandomFourierFeaturizingEstimator.ColumnOptions("Features1", 10, false, "Features"), + new RandomFourierFeaturizingEstimator.ColumnOptions("Features2", 10, false, "Features"), }).Fit(data).Transform(data); data = new ColumnConcatenatingTransformer(Env, "Features", new[] { "Features1", "Features2" }).Transform(data); @@ -1192,8 +1192,8 @@ public void EntryPointMulticlassPipelineEnsemble() { var data = splitOutput.TrainData[i]; data = new RandomFourierFeaturizingEstimator(Env, new[] { - new RandomFourierFeaturizingEstimator.ColumnInfo("Features1", 10, false, "Features"), - new RandomFourierFeaturizingEstimator.ColumnInfo("Features2", 10, false, "Features"), + new RandomFourierFeaturizingEstimator.ColumnOptions("Features1", 10, false, "Features"), + new RandomFourierFeaturizingEstimator.ColumnOptions("Features2", 10, false, "Features"), }).Fit(data).Transform(data); data = new ColumnConcatenatingTransformer(Env, "Features", new[] { "Features1", "Features2" }).Transform(data); @@ -1338,7 +1338,7 @@ public void EntryPointPipelineEnsembleGetSummary() { var data = splitOutput.TrainData[i]; data = new OneHotEncodingEstimator(Env, "Cat").Fit(data).Transform(data); - data = new ColumnConcatenatingTransformer(Env, new ColumnConcatenatingTransformer.ColumnInfo("Features", i % 2 == 0 ? new[] { "Features", "Cat" } : new[] { "Cat", "Features" })).Transform(data); + data = new ColumnConcatenatingTransformer(Env, new ColumnConcatenatingTransformer.ColumnOptions("Features", i % 2 == 0 ? new[] { "Features", "Cat" } : new[] { "Cat", "Features" })).Transform(data); if (i % 2 == 0) { var lrInput = new LogisticRegression.Options diff --git a/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipe.cs b/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipe.cs index ffe85c392a..0fe181f0ae 100644 --- a/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipe.cs +++ b/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipe.cs @@ -1104,7 +1104,7 @@ private void TestHashTransformHelper(T[] data, uint[] results, NumberDataView builder.AddColumn("F1", type, data); var srcView = builder.GetDataView(); - var hashTransform = new HashingTransformer(Env, new HashingEstimator.ColumnInfo("F1", "F1", 5, 42)).Transform(srcView); + var hashTransform = new HashingTransformer(Env, new HashingEstimator.ColumnOptions("F1", "F1", 5, 42)).Transform(srcView); using (var cursor = hashTransform.GetRowCursorForAllColumns()) { var resultGetter = cursor.GetGetter(1); @@ -1135,7 +1135,7 @@ private void TestHashTransformVectorHelper(VBuffer data, uint[][] results, private void TestHashTransformVectorHelper(ArrayDataViewBuilder builder, uint[][] results) { var srcView = builder.GetDataView(); - var hashTransform = new HashingTransformer(Env, new HashingEstimator.ColumnInfo("F1V", "F1V", 5, 42)).Transform(srcView); + var hashTransform = new HashingTransformer(Env, new HashingEstimator.ColumnOptions("F1V", "F1V", 5, 42)).Transform(srcView); using (var cursor = hashTransform.GetRowCursorForAllColumns()) { var resultGetter = cursor.GetGetter>(1); diff --git a/test/Microsoft.ML.Tests/OnnxConversionTest.cs b/test/Microsoft.ML.Tests/OnnxConversionTest.cs index bc96e29c13..308aafeda1 100644 --- a/test/Microsoft.ML.Tests/OnnxConversionTest.cs +++ b/test/Microsoft.ML.Tests/OnnxConversionTest.cs @@ -211,7 +211,7 @@ public void KeyToVectorWithBagOnnxConversionTest() hasHeader: true); var pipeline = mlContext.Transforms.Categorical.OneHotEncoding("F2", "F2", Transforms.Categorical.OneHotEncodingTransformer.OutputKind.Bag) - .Append(mlContext.Transforms.ReplaceMissingValues(new MissingValueReplacingEstimator.ColumnInfo("F2"))) + .Append(mlContext.Transforms.ReplaceMissingValues(new MissingValueReplacingEstimator.ColumnOptions("F2"))) .Append(mlContext.Transforms.Concatenate("Features", "F1", "F2")) .Append(mlContext.BinaryClassification.Trainers.FastTree(labelColumnName: "Label", featureColumnName: "Features", numLeaves: 2, numTrees: 1, minDatapointsInLeaves: 2)); @@ -405,7 +405,7 @@ public void RemoveVariablesInPipelineTest() hasHeader: true); var pipeline = mlContext.Transforms.Categorical.OneHotEncoding("F2", "F2", Transforms.Categorical.OneHotEncodingTransformer.OutputKind.Bag) - .Append(mlContext.Transforms.ReplaceMissingValues(new MissingValueReplacingEstimator.ColumnInfo("F2"))) + .Append(mlContext.Transforms.ReplaceMissingValues(new MissingValueReplacingEstimator.ColumnOptions("F2"))) .Append(mlContext.Transforms.Concatenate("Features", "F1", "F2")) .Append(mlContext.Transforms.Normalize("Features")) .Append(mlContext.BinaryClassification.Trainers.FastTree(labelColumnName: "Label", featureColumnName: "Features", numLeaves: 2, numTrees: 1, minDatapointsInLeaves: 2)); diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs index b3e2502237..a4a211721e 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs @@ -243,9 +243,9 @@ private void NormalizationWorkout(string dataPath) // Apply all kinds of standard ML.NET normalization to the raw features. var pipeline = mlContext.Transforms.Normalize( - new NormalizingEstimator.MinMaxColumn("MinMaxNormalized", "Features", fixZero: true), - new NormalizingEstimator.MeanVarColumn("MeanVarNormalized", "Features", fixZero: true), - new NormalizingEstimator.BinningColumn("BinNormalized", "Features", numBins: 256)); + new NormalizingEstimator.MinMaxColumnOptions("MinMaxNormalized", "Features", fixZero: true), + new NormalizingEstimator.MeanVarColumnOptions("MeanVarNormalized", "Features", fixZero: true), + new NormalizingEstimator.BinningColumnOptions("BinNormalized", "Features", numBins: 256)); // Let's train our pipeline of normalizers, and then apply it to the same data. var normalizedData = pipeline.Fit(trainData).Transform(trainData); diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs index 104cadfce6..eb89c35dd8 100644 --- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs +++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs @@ -544,7 +544,7 @@ public void TensorFlowTransformMNISTLRTrainingTest() var testData = reader.Read(GetDataPath(TestDatasets.mnistOneClass.testFilename)); var pipe = mlContext.Transforms.Categorical.OneHotEncoding("OneHotLabel", "Label") - .Append(mlContext.Transforms.Normalize(new NormalizingEstimator.MinMaxColumn("Features", "Placeholder"))) + .Append(mlContext.Transforms.Normalize(new NormalizingEstimator.MinMaxColumnOptions("Features", "Placeholder"))) .Append(mlContext.Transforms.TensorFlow(new TensorFlowEstimator.Options() { ModelLocation = model_location, @@ -1004,7 +1004,7 @@ public void TensorFlowSentimentClassificationTest() // Then this integer vector is retrieved from the pipeline and resized to fixed length. // The second pipeline 'tfEnginePipe' takes the resized integer vector and passes it to TensoFlow and gets the classification scores. var estimator = mlContext.Transforms.Text.TokenizeWords("TokenizedWords", "Sentiment_Text") - .Append(mlContext.Transforms.Conversion.ValueMap(lookupMap, "Words", "Ids", new SimpleColumnInfo[] { ("Features", "TokenizedWords") })); + .Append(mlContext.Transforms.Conversion.ValueMap(lookupMap, "Words", "Ids", new ColumnOptions[] { ("Features", "TokenizedWords") })); var dataPipe = estimator.Fit(dataView) .CreatePredictionEngine(mlContext); diff --git a/test/Microsoft.ML.Tests/TermEstimatorTests.cs b/test/Microsoft.ML.Tests/TermEstimatorTests.cs index 757b073b07..92dcc9780a 100644 --- a/test/Microsoft.ML.Tests/TermEstimatorTests.cs +++ b/test/Microsoft.ML.Tests/TermEstimatorTests.cs @@ -70,13 +70,13 @@ void TestDifferentTypes() }, new MultiFileSource(dataPath)); var pipe = new ValueToKeyMappingEstimator(ML, new[]{ - new ValueToKeyMappingEstimator.ColumnInfo("TermFloat1", "float1"), - new ValueToKeyMappingEstimator.ColumnInfo("TermFloat4", "float4"), - new ValueToKeyMappingEstimator.ColumnInfo("TermDouble1", "double1"), - new ValueToKeyMappingEstimator.ColumnInfo("TermDouble4", "double4"), - new ValueToKeyMappingEstimator.ColumnInfo("TermInt1", "int1"), - new ValueToKeyMappingEstimator.ColumnInfo("TermText1", "text1"), - new ValueToKeyMappingEstimator.ColumnInfo("TermText2", "text2") + new ValueToKeyMappingEstimator.ColumnOptions("TermFloat1", "float1"), + new ValueToKeyMappingEstimator.ColumnOptions("TermFloat4", "float4"), + new ValueToKeyMappingEstimator.ColumnOptions("TermDouble1", "double1"), + new ValueToKeyMappingEstimator.ColumnOptions("TermDouble4", "double4"), + new ValueToKeyMappingEstimator.ColumnOptions("TermInt1", "int1"), + new ValueToKeyMappingEstimator.ColumnOptions("TermText1", "text1"), + new ValueToKeyMappingEstimator.ColumnOptions("TermText2", "text2") }); var data = loader.Read(dataPath); data = ML.Data.TakeRows(data, 10); @@ -101,9 +101,9 @@ void TestSimpleCase() var stringData = new[] { new TestClassDifferentTypes { A = "1", B = "c", C = "b" } }; var dataView = ML.Data.ReadFromEnumerable(data); var pipe = new ValueToKeyMappingEstimator(Env, new[]{ - new ValueToKeyMappingEstimator.ColumnInfo("TermA", "A"), - new ValueToKeyMappingEstimator.ColumnInfo("TermB", "B"), - new ValueToKeyMappingEstimator.ColumnInfo("TermC", "C") + new ValueToKeyMappingEstimator.ColumnOptions("TermA", "A"), + new ValueToKeyMappingEstimator.ColumnOptions("TermB", "B"), + new ValueToKeyMappingEstimator.ColumnOptions("TermC", "C") }); var invalidData = ML.Data.ReadFromEnumerable(xydata); var validFitNotValidTransformData = ML.Data.ReadFromEnumerable(stringData); @@ -116,9 +116,9 @@ void TestOldSavingAndLoading() var data = new[] { new TestClass() { A = 1, B = 2, C = 3, }, new TestClass() { A = 4, B = 5, C = 6 } }; var dataView = ML.Data.ReadFromEnumerable(data); var est = new ValueToKeyMappingEstimator(Env, new[]{ - new ValueToKeyMappingEstimator.ColumnInfo("TermA", "A"), - new ValueToKeyMappingEstimator.ColumnInfo("TermB", "B"), - new ValueToKeyMappingEstimator.ColumnInfo("TermC", "C") + new ValueToKeyMappingEstimator.ColumnOptions("TermA", "A"), + new ValueToKeyMappingEstimator.ColumnOptions("TermB", "B"), + new ValueToKeyMappingEstimator.ColumnOptions("TermC", "C") }); var transformer = est.Fit(dataView); var result = transformer.Transform(dataView); @@ -138,7 +138,7 @@ void TestMetadataCopy() var data = new[] { new TestMetaClass() { Term = "A", NotUsed = 1 }, new TestMetaClass() { Term = "B" }, new TestMetaClass() { Term = "C" } }; var dataView = ML.Data.ReadFromEnumerable(data); var termEst = new ValueToKeyMappingEstimator(Env, new[] { - new ValueToKeyMappingEstimator.ColumnInfo("T", "Term") }); + new ValueToKeyMappingEstimator.ColumnOptions("T", "Term") }); var termTransformer = termEst.Fit(dataView); var result = termTransformer.Transform(dataView); diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/TrainerEstimators.cs b/test/Microsoft.ML.Tests/TrainerEstimators/TrainerEstimators.cs index a76e736fca..430dbc7dbd 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/TrainerEstimators.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/TrainerEstimators.cs @@ -196,8 +196,8 @@ public void TestEstimatorMultiClassNaiveBayesTrainer() // Pipeline. var pipeline = new ValueToKeyMappingEstimator(Env, new[]{ - new ValueToKeyMappingEstimator.ColumnInfo("Group", "Workclass"), - new ValueToKeyMappingEstimator.ColumnInfo("Label0", "Label") }); + new ValueToKeyMappingEstimator.ColumnOptions("Group", "Workclass"), + new ValueToKeyMappingEstimator.ColumnOptions("Label0", "Label") }); return (pipeline, data); } diff --git a/test/Microsoft.ML.Tests/Transformers/CategoricalHashTests.cs b/test/Microsoft.ML.Tests/Transformers/CategoricalHashTests.cs index 8295cdd789..be406cefae 100644 --- a/test/Microsoft.ML.Tests/Transformers/CategoricalHashTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/CategoricalHashTests.cs @@ -52,10 +52,10 @@ public void CategoricalHashWorkout() var dataView = ML.Data.ReadFromEnumerable(data); var pipe = ML.Transforms.Categorical.OneHotHashEncoding(new[]{ - new OneHotHashEncodingEstimator.ColumnInfo("CatA", "A", OneHotEncodingTransformer.OutputKind.Bag), - new OneHotHashEncodingEstimator.ColumnInfo("CatB", "A", OneHotEncodingTransformer.OutputKind.Bin), - new OneHotHashEncodingEstimator.ColumnInfo("CatC", "A", OneHotEncodingTransformer.OutputKind.Ind), - new OneHotHashEncodingEstimator.ColumnInfo("CatD", "A", OneHotEncodingTransformer.OutputKind.Key), + new OneHotHashEncodingEstimator.ColumnOptions("CatA", "A", OneHotEncodingTransformer.OutputKind.Bag), + new OneHotHashEncodingEstimator.ColumnOptions("CatB", "A", OneHotEncodingTransformer.OutputKind.Bin), + new OneHotHashEncodingEstimator.ColumnOptions("CatC", "A", OneHotEncodingTransformer.OutputKind.Ind), + new OneHotHashEncodingEstimator.ColumnOptions("CatD", "A", OneHotEncodingTransformer.OutputKind.Key), }); TestEstimatorCore(pipe, dataView); @@ -110,16 +110,16 @@ public void TestMetadataPropagation() var dataView = ML.Data.ReadFromEnumerable(data); var bagPipe = ML.Transforms.Categorical.OneHotHashEncoding( - new OneHotHashEncodingEstimator.ColumnInfo("CatA", "A", OneHotEncodingTransformer.OutputKind.Bag, invertHash: -1), - new OneHotHashEncodingEstimator.ColumnInfo("CatB", "B", OneHotEncodingTransformer.OutputKind.Bag, invertHash: -1), - new OneHotHashEncodingEstimator.ColumnInfo("CatC", "C", OneHotEncodingTransformer.OutputKind.Bag, invertHash: -1), - new OneHotHashEncodingEstimator.ColumnInfo("CatD", "D", OneHotEncodingTransformer.OutputKind.Bag, invertHash: -1), - new OneHotHashEncodingEstimator.ColumnInfo("CatE", "E", OneHotEncodingTransformer.OutputKind.Ind, invertHash: -1), - new OneHotHashEncodingEstimator.ColumnInfo("CatF", "F", OneHotEncodingTransformer.OutputKind.Ind, invertHash: -1), - new OneHotHashEncodingEstimator.ColumnInfo("CatG", "A", OneHotEncodingTransformer.OutputKind.Key, invertHash: -1), - new OneHotHashEncodingEstimator.ColumnInfo("CatH", "B", OneHotEncodingTransformer.OutputKind.Key, invertHash: -1), - new OneHotHashEncodingEstimator.ColumnInfo("CatI", "A", OneHotEncodingTransformer.OutputKind.Bin, invertHash: -1), - new OneHotHashEncodingEstimator.ColumnInfo("CatJ", "B", OneHotEncodingTransformer.OutputKind.Bin, invertHash: -1)); + new OneHotHashEncodingEstimator.ColumnOptions("CatA", "A", OneHotEncodingTransformer.OutputKind.Bag, invertHash: -1), + new OneHotHashEncodingEstimator.ColumnOptions("CatB", "B", OneHotEncodingTransformer.OutputKind.Bag, invertHash: -1), + new OneHotHashEncodingEstimator.ColumnOptions("CatC", "C", OneHotEncodingTransformer.OutputKind.Bag, invertHash: -1), + new OneHotHashEncodingEstimator.ColumnOptions("CatD", "D", OneHotEncodingTransformer.OutputKind.Bag, invertHash: -1), + new OneHotHashEncodingEstimator.ColumnOptions("CatE", "E", OneHotEncodingTransformer.OutputKind.Ind, invertHash: -1), + new OneHotHashEncodingEstimator.ColumnOptions("CatF", "F", OneHotEncodingTransformer.OutputKind.Ind, invertHash: -1), + new OneHotHashEncodingEstimator.ColumnOptions("CatG", "A", OneHotEncodingTransformer.OutputKind.Key, invertHash: -1), + new OneHotHashEncodingEstimator.ColumnOptions("CatH", "B", OneHotEncodingTransformer.OutputKind.Key, invertHash: -1), + new OneHotHashEncodingEstimator.ColumnOptions("CatI", "A", OneHotEncodingTransformer.OutputKind.Bin, invertHash: -1), + new OneHotHashEncodingEstimator.ColumnOptions("CatJ", "B", OneHotEncodingTransformer.OutputKind.Bin, invertHash: -1)); var bagResult = bagPipe.Fit(dataView).Transform(dataView); ValidateMetadata(bagResult); @@ -213,9 +213,9 @@ public void TestOldSavingAndLoading() var data = new[] { new TestClass() { A = "1", B = "2", C = "3", }, new TestClass() { A = "4", B = "5", C = "6" } }; var dataView = ML.Data.ReadFromEnumerable(data); var pipe = ML.Transforms.Categorical.OneHotHashEncoding(new[]{ - new OneHotHashEncodingEstimator.ColumnInfo("CatHashA", "A"), - new OneHotHashEncodingEstimator.ColumnInfo("CatHashB", "B"), - new OneHotHashEncodingEstimator.ColumnInfo("CatHashC", "C") + new OneHotHashEncodingEstimator.ColumnOptions("CatHashA", "A"), + new OneHotHashEncodingEstimator.ColumnOptions("CatHashB", "B"), + new OneHotHashEncodingEstimator.ColumnOptions("CatHashC", "C") }); var result = pipe.Fit(dataView).Transform(dataView); var resultRoles = new RoleMappedData(result); diff --git a/test/Microsoft.ML.Tests/Transformers/CategoricalTests.cs b/test/Microsoft.ML.Tests/Transformers/CategoricalTests.cs index ddb5c1c504..d1ff00303d 100644 --- a/test/Microsoft.ML.Tests/Transformers/CategoricalTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/CategoricalTests.cs @@ -60,10 +60,10 @@ public void CategoricalWorkout() var dataView = ML.Data.ReadFromEnumerable(data); var pipe = ML.Transforms.Categorical.OneHotEncoding(new[]{ - new OneHotEncodingEstimator.ColumnInfo("CatA", "A", OneHotEncodingTransformer.OutputKind.Bag), - new OneHotEncodingEstimator.ColumnInfo("CatB", "A", OneHotEncodingTransformer.OutputKind.Bin), - new OneHotEncodingEstimator.ColumnInfo("CatC", "A", OneHotEncodingTransformer.OutputKind.Ind), - new OneHotEncodingEstimator.ColumnInfo("CatD", "A", OneHotEncodingTransformer.OutputKind.Key), + new OneHotEncodingEstimator.ColumnOptions("CatA", "A", OneHotEncodingTransformer.OutputKind.Bag), + new OneHotEncodingEstimator.ColumnOptions("CatB", "A", OneHotEncodingTransformer.OutputKind.Bin), + new OneHotEncodingEstimator.ColumnOptions("CatC", "A", OneHotEncodingTransformer.OutputKind.Ind), + new OneHotEncodingEstimator.ColumnOptions("CatD", "A", OneHotEncodingTransformer.OutputKind.Key), }); TestEstimatorCore(pipe, dataView); @@ -121,7 +121,7 @@ public void CategoricalOneHotEncodingFromSideData() sideDataBuilder.AddColumn("Hello", "hello", "my", "friend"); var sideData = sideDataBuilder.GetDataView(); - var ci = new OneHotEncodingEstimator.ColumnInfo("CatA", "A", OneHotEncodingTransformer.OutputKind.Bag); + var ci = new OneHotEncodingEstimator.ColumnOptions("CatA", "A", OneHotEncodingTransformer.OutputKind.Bag); var pipe = mlContext.Transforms.Categorical.OneHotEncoding(new[] { ci }, sideData); var output = pipe.Fit(dataView).Transform(dataView); @@ -180,18 +180,18 @@ public void TestMetadataPropagation() var dataView = ML.Data.ReadFromEnumerable(data); var pipe = ML.Transforms.Categorical.OneHotEncoding(new[] { - new OneHotEncodingEstimator.ColumnInfo("CatA", "A", OneHotEncodingTransformer.OutputKind.Bag), - new OneHotEncodingEstimator.ColumnInfo("CatB", "B", OneHotEncodingTransformer.OutputKind.Bag), - new OneHotEncodingEstimator.ColumnInfo("CatC", "C", OneHotEncodingTransformer.OutputKind.Bag), - new OneHotEncodingEstimator.ColumnInfo("CatD", "D", OneHotEncodingTransformer.OutputKind.Bag), - new OneHotEncodingEstimator.ColumnInfo("CatE", "E",OneHotEncodingTransformer.OutputKind.Ind), - new OneHotEncodingEstimator.ColumnInfo("CatF", "F", OneHotEncodingTransformer.OutputKind.Ind), - new OneHotEncodingEstimator.ColumnInfo("CatG", "G", OneHotEncodingTransformer.OutputKind.Key), - new OneHotEncodingEstimator.ColumnInfo("CatH", "H", OneHotEncodingTransformer.OutputKind.Key), - new OneHotEncodingEstimator.ColumnInfo("CatI", "A", OneHotEncodingTransformer.OutputKind.Bin), - new OneHotEncodingEstimator.ColumnInfo("CatJ", "B", OneHotEncodingTransformer.OutputKind.Bin), - new OneHotEncodingEstimator.ColumnInfo("CatK", "C", OneHotEncodingTransformer.OutputKind.Bin), - new OneHotEncodingEstimator.ColumnInfo("CatL", "D", OneHotEncodingTransformer.OutputKind.Bin) }); + new OneHotEncodingEstimator.ColumnOptions("CatA", "A", OneHotEncodingTransformer.OutputKind.Bag), + new OneHotEncodingEstimator.ColumnOptions("CatB", "B", OneHotEncodingTransformer.OutputKind.Bag), + new OneHotEncodingEstimator.ColumnOptions("CatC", "C", OneHotEncodingTransformer.OutputKind.Bag), + new OneHotEncodingEstimator.ColumnOptions("CatD", "D", OneHotEncodingTransformer.OutputKind.Bag), + new OneHotEncodingEstimator.ColumnOptions("CatE", "E",OneHotEncodingTransformer.OutputKind.Ind), + new OneHotEncodingEstimator.ColumnOptions("CatF", "F", OneHotEncodingTransformer.OutputKind.Ind), + new OneHotEncodingEstimator.ColumnOptions("CatG", "G", OneHotEncodingTransformer.OutputKind.Key), + new OneHotEncodingEstimator.ColumnOptions("CatH", "H", OneHotEncodingTransformer.OutputKind.Key), + new OneHotEncodingEstimator.ColumnOptions("CatI", "A", OneHotEncodingTransformer.OutputKind.Bin), + new OneHotEncodingEstimator.ColumnOptions("CatJ", "B", OneHotEncodingTransformer.OutputKind.Bin), + new OneHotEncodingEstimator.ColumnOptions("CatK", "C", OneHotEncodingTransformer.OutputKind.Bin), + new OneHotEncodingEstimator.ColumnOptions("CatL", "D", OneHotEncodingTransformer.OutputKind.Bin) }); var result = pipe.Fit(dataView).Transform(dataView); @@ -303,9 +303,9 @@ public void TestOldSavingAndLoading() var data = new[] { new TestClass() { A = 1, B = 2, C = 3, }, new TestClass() { A = 4, B = 5, C = 6 } }; var dataView = ML.Data.ReadFromEnumerable(data); var pipe = ML.Transforms.Categorical.OneHotEncoding(new[]{ - new OneHotEncodingEstimator.ColumnInfo("TermA", "A"), - new OneHotEncodingEstimator.ColumnInfo("TermB", "B"), - new OneHotEncodingEstimator.ColumnInfo("TermC", "C") + new OneHotEncodingEstimator.ColumnOptions("TermA", "A"), + new OneHotEncodingEstimator.ColumnOptions("TermB", "B"), + new OneHotEncodingEstimator.ColumnOptions("TermC", "C") }); var result = pipe.Fit(dataView).Transform(dataView); var resultRoles = new RoleMappedData(result); diff --git a/test/Microsoft.ML.Tests/Transformers/ConcatTests.cs b/test/Microsoft.ML.Tests/Transformers/ConcatTests.cs index ceaefbc8d1..f1e84a1c70 100644 --- a/test/Microsoft.ML.Tests/Transformers/ConcatTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/ConcatTests.cs @@ -103,8 +103,8 @@ DataViewType GetType(DataViewSchema schema, string name) data = ML.Data.TakeRows(data, 10); var concater = new ColumnConcatenatingTransformer(ML, - new ColumnConcatenatingTransformer.ColumnInfo("f2", new[] { ("float1", "FLOAT1"), ("float1", "FLOAT2") }), - new ColumnConcatenatingTransformer.ColumnInfo("f3", new[] { ("float4", "FLOAT4"), ("float1", "FLOAT1") })); + new ColumnConcatenatingTransformer.ColumnOptions("f2", new[] { ("float1", "FLOAT1"), ("float1", "FLOAT2") }), + new ColumnConcatenatingTransformer.ColumnOptions("f3", new[] { ("float4", "FLOAT4"), ("float1", "FLOAT1") })); data = concater.Transform(data); // Test Columns property. diff --git a/test/Microsoft.ML.Tests/Transformers/ConvertTests.cs b/test/Microsoft.ML.Tests/Transformers/ConvertTests.cs index 32b07a97c5..66c31fc808 100644 --- a/test/Microsoft.ML.Tests/Transformers/ConvertTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/ConvertTests.cs @@ -75,8 +75,8 @@ public void TestConvertWorkout() var data = new[] { new TestClass() { A = 1, B = new int[2] { 1,4 } }, new TestClass() { A = 2, B = new int[2] { 3,4 } }}; var dataView = ML.Data.ReadFromEnumerable(data); - var pipe = ML.Transforms.Conversion.ConvertType(columns: new[] {new TypeConvertingEstimator.ColumnInfo("ConvA", DataKind.Single, "A"), - new TypeConvertingEstimator.ColumnInfo("ConvB", DataKind.Single, "B")}); + var pipe = ML.Transforms.Conversion.ConvertType(columns: new[] {new TypeConvertingEstimator.ColumnOptions("ConvA", DataKind.Single, "A"), + new TypeConvertingEstimator.ColumnOptions("ConvB", DataKind.Single, "B")}); TestEstimatorCore(pipe, dataView); var allTypesData = new[] @@ -115,18 +115,18 @@ public void TestConvertWorkout() var allTypesDataView = ML.Data.ReadFromEnumerable(allTypesData); var allTypesPipe = ML.Transforms.Conversion.ConvertType(columns: new[] { - new TypeConvertingEstimator.ColumnInfo("ConvA", DataKind.Single, "AA"), - new TypeConvertingEstimator.ColumnInfo("ConvB", DataKind.Single, "AB"), - new TypeConvertingEstimator.ColumnInfo("ConvC", DataKind.Single, "AC"), - new TypeConvertingEstimator.ColumnInfo("ConvD", DataKind.Single, "AD"), - new TypeConvertingEstimator.ColumnInfo("ConvE", DataKind.Single, "AE"), - new TypeConvertingEstimator.ColumnInfo("ConvF", DataKind.Single, "AF"), - new TypeConvertingEstimator.ColumnInfo("ConvG", DataKind.Single, "AG"), - new TypeConvertingEstimator.ColumnInfo("ConvH", DataKind.Single, "AH"), - new TypeConvertingEstimator.ColumnInfo("ConvK", DataKind.Single, "AK"), - new TypeConvertingEstimator.ColumnInfo("ConvL", DataKind.Single, "AL"), - new TypeConvertingEstimator.ColumnInfo("ConvM", DataKind.Single, "AM"), - new TypeConvertingEstimator.ColumnInfo("ConvN", DataKind.Single, "AN")} + new TypeConvertingEstimator.ColumnOptions("ConvA", DataKind.Single, "AA"), + new TypeConvertingEstimator.ColumnOptions("ConvB", DataKind.Single, "AB"), + new TypeConvertingEstimator.ColumnOptions("ConvC", DataKind.Single, "AC"), + new TypeConvertingEstimator.ColumnOptions("ConvD", DataKind.Single, "AD"), + new TypeConvertingEstimator.ColumnOptions("ConvE", DataKind.Single, "AE"), + new TypeConvertingEstimator.ColumnOptions("ConvF", DataKind.Single, "AF"), + new TypeConvertingEstimator.ColumnOptions("ConvG", DataKind.Single, "AG"), + new TypeConvertingEstimator.ColumnOptions("ConvH", DataKind.Single, "AH"), + new TypeConvertingEstimator.ColumnOptions("ConvK", DataKind.Single, "AK"), + new TypeConvertingEstimator.ColumnOptions("ConvL", DataKind.Single, "AL"), + new TypeConvertingEstimator.ColumnOptions("ConvM", DataKind.Single, "AM"), + new TypeConvertingEstimator.ColumnOptions("ConvN", DataKind.Single, "AN")} ); TestEstimatorCore(allTypesPipe, allTypesDataView); @@ -156,7 +156,7 @@ public void ValueToKeyFromSideData() var sideData = sideDataBuilder.GetDataView(); // For some reason the column info is on the *transformer*, not the estimator. Already tracked as issue #1760. - var ci = new ValueToKeyMappingEstimator.ColumnInfo("CatA", "A"); + var ci = new ValueToKeyMappingEstimator.ColumnOptions("CatA", "A"); var pipe = mlContext.Transforms.Conversion.MapValueToKey(new[] { ci }, sideData); var output = pipe.Fit(dataView).Transform(dataView); @@ -185,8 +185,8 @@ public void TestOldSavingAndLoading() var data = new[] { new TestClass() { A = 1, B = new int[2] { 1,4 } }, new TestClass() { A = 2, B = new int[2] { 3,4 } }}; var dataView = ML.Data.ReadFromEnumerable(data); - var pipe = ML.Transforms.Conversion.ConvertType(columns: new[] {new TypeConvertingEstimator.ColumnInfo("ConvA", typeof(double), "A"), - new TypeConvertingEstimator.ColumnInfo("ConvB", typeof(double), "B")}); + var pipe = ML.Transforms.Conversion.ConvertType(columns: new[] {new TypeConvertingEstimator.ColumnOptions("ConvA", typeof(double), "A"), + new TypeConvertingEstimator.ColumnOptions("ConvB", typeof(double), "B")}); var result = pipe.Fit(dataView).Transform(dataView); var resultRoles = new RoleMappedData(result); @@ -204,11 +204,11 @@ public void TestMetadata() var data = new[] { new MetaClass() { A = 1, B = "A" }, new MetaClass() { A = 2, B = "B" }}; var pipe = ML.Transforms.Categorical.OneHotEncoding(new[] { - new OneHotEncodingEstimator.ColumnInfo("CatA", "A", OneHotEncodingTransformer.OutputKind.Ind), - new OneHotEncodingEstimator.ColumnInfo("CatB", "B", OneHotEncodingTransformer.OutputKind.Key) + new OneHotEncodingEstimator.ColumnOptions("CatA", "A", OneHotEncodingTransformer.OutputKind.Ind), + new OneHotEncodingEstimator.ColumnOptions("CatB", "B", OneHotEncodingTransformer.OutputKind.Key) }).Append(ML.Transforms.Conversion.ConvertType(new[] { - new TypeConvertingEstimator.ColumnInfo("ConvA", DataKind.Double, "CatA"), - new TypeConvertingEstimator.ColumnInfo("ConvB", DataKind.UInt16, "CatB") + new TypeConvertingEstimator.ColumnOptions("ConvA", DataKind.Double, "CatA"), + new TypeConvertingEstimator.ColumnOptions("ConvB", DataKind.UInt16, "CatB") })); var dataView = ML.Data.ReadFromEnumerable(data); dataView = pipe.Fit(dataView).Transform(dataView); @@ -242,7 +242,7 @@ public class SimpleSchemaUIntColumn public void TypeConvertKeyBackCompatTest() { // Model generated using the following command before the change removing Min and Count from KeyType. - // ML.Transforms.Conversion.ConvertType(new[] { new TypeConvertingEstimator.ColumnInfo("key", "convertedKey", + // ML.Transforms.Conversion.ConvertType(new[] { new TypeConvertingEstimator.ColumnOptions("key", "convertedKey", // DataKind.UInt64, new KeyCount(4)) }).Fit(dataView); var dataArray = new[] { @@ -265,7 +265,7 @@ public void TypeConvertKeyBackCompatTest() } var outDataOld = modelOld.Transform(dataView); - var modelNew = ML.Transforms.Conversion.ConvertType(new[] { new TypeConvertingEstimator.ColumnInfo("convertedKey", + var modelNew = ML.Transforms.Conversion.ConvertType(new[] { new TypeConvertingEstimator.ColumnOptions("convertedKey", DataKind.UInt64, "key", new KeyCount(4)) }).Fit(dataView); var outDataNew = modelNew.Transform(dataView); diff --git a/test/Microsoft.ML.Tests/Transformers/FeatureSelectionTests.cs b/test/Microsoft.ML.Tests/Transformers/FeatureSelectionTests.cs index 40eaa5f745..dc840698fb 100644 --- a/test/Microsoft.ML.Tests/Transformers/FeatureSelectionTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/FeatureSelectionTests.cs @@ -74,12 +74,12 @@ public void DropSlotsTransform() var columns = new[] { - new SlotsDroppingTransformer.ColumnInfo("dropped1", "VectorFloat", (min: 0, max: 1)), - new SlotsDroppingTransformer.ColumnInfo("dropped2", "VectorFloat"), - new SlotsDroppingTransformer.ColumnInfo("dropped3", "ScalarFloat", (min:0, max: 3)), - new SlotsDroppingTransformer.ColumnInfo("dropped4", "VectorFloat", (min: 1, max: 2)), - new SlotsDroppingTransformer.ColumnInfo("dropped5", "VectorDouble", (min: 1, null)), - new SlotsDroppingTransformer.ColumnInfo("dropped6", "VectorFloat", (min: 100, null)) + new SlotsDroppingTransformer.ColumnOptions("dropped1", "VectorFloat", (min: 0, max: 1)), + new SlotsDroppingTransformer.ColumnOptions("dropped2", "VectorFloat"), + new SlotsDroppingTransformer.ColumnOptions("dropped3", "ScalarFloat", (min:0, max: 3)), + new SlotsDroppingTransformer.ColumnOptions("dropped4", "VectorFloat", (min: 1, max: 2)), + new SlotsDroppingTransformer.ColumnOptions("dropped5", "VectorDouble", (min: 1, null)), + new SlotsDroppingTransformer.ColumnOptions("dropped6", "VectorFloat", (min: 100, null)) }; var trans = new SlotsDroppingTransformer(ML, columns); @@ -115,11 +115,11 @@ public void CountFeatureSelectionWorkout() var data = ML.Data.Cache(reader.Read(new MultiFileSource(dataPath)).AsDynamic); var columns = new[] { - new CountFeatureSelectingEstimator.ColumnInfo("FeatureSelectDouble", "VectorDouble", minCount: 1), - new CountFeatureSelectingEstimator.ColumnInfo("ScalFeatureSelectMissing690", "ScalarFloat", minCount: 690), - new CountFeatureSelectingEstimator.ColumnInfo("ScalFeatureSelectMissing100", "ScalarFloat", minCount: 100), - new CountFeatureSelectingEstimator.ColumnInfo("VecFeatureSelectMissing690", "VectorDouble", minCount: 690), - new CountFeatureSelectingEstimator.ColumnInfo("VecFeatureSelectMissing100", "VectorDouble", minCount: 100) + new CountFeatureSelectingEstimator.ColumnOptions("FeatureSelectDouble", "VectorDouble", minCount: 1), + new CountFeatureSelectingEstimator.ColumnOptions("ScalFeatureSelectMissing690", "ScalarFloat", minCount: 690), + new CountFeatureSelectingEstimator.ColumnOptions("ScalFeatureSelectMissing100", "ScalarFloat", minCount: 100), + new CountFeatureSelectingEstimator.ColumnOptions("VecFeatureSelectMissing690", "VectorDouble", minCount: 690), + new CountFeatureSelectingEstimator.ColumnOptions("VecFeatureSelectMissing100", "VectorDouble", minCount: 100) }; var est = ML.Transforms.FeatureSelection.SelectFeaturesBasedOnCount("FeatureSelect", "VectorFloat", count: 1) .Append(ML.Transforms.FeatureSelection.SelectFeaturesBasedOnCount(columns)); @@ -184,7 +184,7 @@ public void MutualInformationSelectionWorkout() var est = ML.Transforms.FeatureSelection.SelectFeaturesBasedOnMutualInformation("FeatureSelect", "VectorFloat", slotsInOutput: 1, labelColumn: "Label") .Append(ML.Transforms.FeatureSelection.SelectFeaturesBasedOnMutualInformation(labelColumn: "Label", slotsInOutput: 2, numBins: 100, - columns: new SimpleColumnInfo[] { + columns: new ColumnOptions[] { ("out1", "VectorFloat"), ("out2", "VectorDouble") })); diff --git a/test/Microsoft.ML.Tests/Transformers/HashTests.cs b/test/Microsoft.ML.Tests/Transformers/HashTests.cs index ee6a2b15cb..73fcce6cbe 100644 --- a/test/Microsoft.ML.Tests/Transformers/HashTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/HashTests.cs @@ -47,10 +47,10 @@ public void HashWorkout() var dataView = ML.Data.ReadFromEnumerable(data); var pipe = ML.Transforms.Conversion.Hash(new[]{ - new HashingEstimator.ColumnInfo("HashA", "A", hashBits:4, invertHash:-1), - new HashingEstimator.ColumnInfo("HashB", "B", hashBits:3, ordered:true), - new HashingEstimator.ColumnInfo("HashC", "C", seed:42), - new HashingEstimator.ColumnInfo("HashD", "A"), + new HashingEstimator.ColumnOptions("HashA", "A", hashBits:4, invertHash:-1), + new HashingEstimator.ColumnOptions("HashB", "B", hashBits:3, ordered:true), + new HashingEstimator.ColumnOptions("HashC", "C", seed:42), + new HashingEstimator.ColumnOptions("HashD", "A"), }); TestEstimatorCore(pipe, dataView); @@ -69,9 +69,9 @@ public void TestMetadata() var dataView = ML.Data.ReadFromEnumerable(data); var pipe = ML.Transforms.Conversion.Hash(new[] { - new HashingEstimator.ColumnInfo("HashA", "A", invertHash:1, hashBits:10), - new HashingEstimator.ColumnInfo("HashAUnlim", "A", invertHash:-1, hashBits:10), - new HashingEstimator.ColumnInfo("HashAUnlimOrdered", "A", invertHash:-1, hashBits:10, ordered:true) + new HashingEstimator.ColumnOptions("HashA", "A", invertHash:1, hashBits:10), + new HashingEstimator.ColumnOptions("HashAUnlim", "A", invertHash:-1, hashBits:10), + new HashingEstimator.ColumnOptions("HashAUnlimOrdered", "A", invertHash:-1, hashBits:10, ordered:true) }); var result = pipe.Fit(dataView).Transform(dataView); ValidateMetadata(result); @@ -109,10 +109,10 @@ public void TestOldSavingAndLoading() var data = new[] { new TestClass() { A = 1, B = 2, C = 3, }, new TestClass() { A = 4, B = 5, C = 6 } }; var dataView = ML.Data.ReadFromEnumerable(data); var pipe = ML.Transforms.Conversion.Hash(new[]{ - new HashingEstimator.ColumnInfo("HashA", "A", hashBits:4, invertHash:-1), - new HashingEstimator.ColumnInfo("HashB", "B", hashBits:3, ordered:true), - new HashingEstimator.ColumnInfo("HashC", "C", seed:42), - new HashingEstimator.ColumnInfo("HashD" ,"A"), + new HashingEstimator.ColumnOptions("HashA", "A", hashBits:4, invertHash:-1), + new HashingEstimator.ColumnOptions("HashB", "B", hashBits:3, ordered:true), + new HashingEstimator.ColumnOptions("HashC", "C", seed:42), + new HashingEstimator.ColumnOptions("HashD" ,"A"), }); var result = pipe.Fit(dataView).Transform(dataView); var resultRoles = new RoleMappedData(result); @@ -133,7 +133,7 @@ private void HashTestCore(T val, PrimitiveDataViewType type, uint expected, u var inRow = AnnotationUtils.AnnotationsAsRow(builder.ToAnnotations()); // First do an unordered hash. - var info = new HashingEstimator.ColumnInfo("Bar", "Foo", hashBits: bits); + var info = new HashingEstimator.ColumnOptions("Bar", "Foo", hashBits: bits); var xf = new HashingTransformer(Env, new[] { info }); var mapper = ((ITransformer)xf).GetRowToRowMapper(inRow.Schema); mapper.OutputSchema.TryGetColumnIndex("Bar", out int outCol); @@ -145,7 +145,7 @@ private void HashTestCore(T val, PrimitiveDataViewType type, uint expected, u Assert.Equal(expected, result); // Next do an ordered hash. - info = new HashingEstimator.ColumnInfo("Bar", "Foo", hashBits: bits, ordered: true); + info = new HashingEstimator.ColumnOptions("Bar", "Foo", hashBits: bits, ordered: true); xf = new HashingTransformer(Env, new[] { info }); mapper = ((ITransformer)xf).GetRowToRowMapper(inRow.Schema); mapper.OutputSchema.TryGetColumnIndex("Bar", out outCol); @@ -163,7 +163,7 @@ private void HashTestCore(T val, PrimitiveDataViewType type, uint expected, u builder.Add("Foo", new VectorType(type, vecLen), (ref VBuffer dst) => denseVec.CopyTo(ref dst)); inRow = AnnotationUtils.AnnotationsAsRow(builder.ToAnnotations()); - info = new HashingEstimator.ColumnInfo("Bar", "Foo", hashBits: bits, ordered: false); + info = new HashingEstimator.ColumnOptions("Bar", "Foo", hashBits: bits, ordered: false); xf = new HashingTransformer(Env, new[] { info }); mapper = ((ITransformer)xf).GetRowToRowMapper(inRow.Schema); mapper.OutputSchema.TryGetColumnIndex("Bar", out outCol); @@ -178,7 +178,7 @@ private void HashTestCore(T val, PrimitiveDataViewType type, uint expected, u Assert.All(vecResult.DenseValues(), v => Assert.Equal(expected, v)); // Now do ordered with the dense vector. - info = new HashingEstimator.ColumnInfo("Bar", "Foo", hashBits: bits, ordered: true); + info = new HashingEstimator.ColumnOptions("Bar", "Foo", hashBits: bits, ordered: true); xf = new HashingTransformer(Env, new[] { info }); mapper = ((ITransformer)xf).GetRowToRowMapper(inRow.Schema); mapper.OutputSchema.TryGetColumnIndex("Bar", out outCol); @@ -197,7 +197,7 @@ private void HashTestCore(T val, PrimitiveDataViewType type, uint expected, u builder.Add("Foo", new VectorType(type, vecLen), (ref VBuffer dst) => sparseVec.CopyTo(ref dst)); inRow = AnnotationUtils.AnnotationsAsRow(builder.ToAnnotations()); - info = new HashingEstimator.ColumnInfo("Bar", "Foo", hashBits: bits, ordered: false); + info = new HashingEstimator.ColumnOptions("Bar", "Foo", hashBits: bits, ordered: false); xf = new HashingTransformer(Env, new[] { info }); mapper = ((ITransformer)xf).GetRowToRowMapper(inRow.Schema); mapper.OutputSchema.TryGetColumnIndex("Bar", out outCol); @@ -210,7 +210,7 @@ private void HashTestCore(T val, PrimitiveDataViewType type, uint expected, u Assert.Equal(expected, vecResult.GetItemOrDefault(3)); Assert.Equal(expected, vecResult.GetItemOrDefault(7)); - info = new HashingEstimator.ColumnInfo("Bar", "Foo", hashBits: bits, ordered: true); + info = new HashingEstimator.ColumnOptions("Bar", "Foo", hashBits: bits, ordered: true); xf = new HashingTransformer(Env, new[] { info }); mapper = ((ITransformer)xf).GetRowToRowMapper(inRow.Schema); mapper.OutputSchema.TryGetColumnIndex("Bar", out outCol); diff --git a/test/Microsoft.ML.Tests/Transformers/KeyToBinaryVectorEstimatorTest.cs b/test/Microsoft.ML.Tests/Transformers/KeyToBinaryVectorEstimatorTest.cs index c7a5468307..126de633fc 100644 --- a/test/Microsoft.ML.Tests/Transformers/KeyToBinaryVectorEstimatorTest.cs +++ b/test/Microsoft.ML.Tests/Transformers/KeyToBinaryVectorEstimatorTest.cs @@ -47,9 +47,9 @@ public void KeyToBinaryVectorWorkout() var dataView = ML.Data.ReadFromEnumerable(data); dataView = new ValueToKeyMappingEstimator(Env, new[]{ - new ValueToKeyMappingEstimator.ColumnInfo("TermA", "A"), - new ValueToKeyMappingEstimator.ColumnInfo("TermB", "B"), - new ValueToKeyMappingEstimator.ColumnInfo("TermC", "C", textKeyValues:true) + new ValueToKeyMappingEstimator.ColumnOptions("TermA", "A"), + new ValueToKeyMappingEstimator.ColumnOptions("TermB", "B"), + new ValueToKeyMappingEstimator.ColumnOptions("TermC", "C", textKeyValues:true) }).Fit(dataView).Transform(dataView); var pipe = ML.Transforms.Conversion.MapKeyToBinaryVector(("CatA", "TermA"), ("CatC", "TermC")); @@ -70,8 +70,8 @@ public void KeyToBinaryVectorStatic() // Non-pigsty Term. var dynamicData = new ValueToKeyMappingEstimator(Env, new[] { - new ValueToKeyMappingEstimator.ColumnInfo("A", "ScalarString"), - new ValueToKeyMappingEstimator.ColumnInfo("B", "VectorString") }) + new ValueToKeyMappingEstimator.ColumnOptions("A", "ScalarString"), + new ValueToKeyMappingEstimator.ColumnOptions("B", "VectorString") }) .Fit(data.AsDynamic).Transform(data.AsDynamic); var data2 = dynamicData.AssertStatic(Env, ctx => ( @@ -99,10 +99,10 @@ public void TestMetadataPropagation() var dataView = ML.Data.ReadFromEnumerable(data); var termEst = new ValueToKeyMappingEstimator(Env, new[] { - new ValueToKeyMappingEstimator.ColumnInfo("TA", "A", textKeyValues: true), - new ValueToKeyMappingEstimator.ColumnInfo("TB", "B", textKeyValues: true), - new ValueToKeyMappingEstimator.ColumnInfo("TC", "C"), - new ValueToKeyMappingEstimator.ColumnInfo("TD", "D") }); + new ValueToKeyMappingEstimator.ColumnOptions("TA", "A", textKeyValues: true), + new ValueToKeyMappingEstimator.ColumnOptions("TB", "B", textKeyValues: true), + new ValueToKeyMappingEstimator.ColumnOptions("TC", "C"), + new ValueToKeyMappingEstimator.ColumnOptions("TD", "D") }); var termTransformer = termEst.Fit(dataView); dataView = termTransformer.Transform(dataView); @@ -150,9 +150,9 @@ public void TestOldSavingAndLoading() var data = new[] { new TestClass() { A = 1, B = 2, C = 3, }, new TestClass() { A = 4, B = 5, C = 6 } }; var dataView = ML.Data.ReadFromEnumerable(data); var est = new ValueToKeyMappingEstimator(Env, new[]{ - new ValueToKeyMappingEstimator.ColumnInfo("TermA", "A"), - new ValueToKeyMappingEstimator.ColumnInfo("TermB", "B", textKeyValues:true), - new ValueToKeyMappingEstimator.ColumnInfo("TermC", "C") + new ValueToKeyMappingEstimator.ColumnOptions("TermA", "A"), + new ValueToKeyMappingEstimator.ColumnOptions("TermB", "B", textKeyValues:true), + new ValueToKeyMappingEstimator.ColumnOptions("TermC", "C") }); var transformer = est.Fit(dataView); dataView = transformer.Transform(dataView); diff --git a/test/Microsoft.ML.Tests/Transformers/KeyToValueTests.cs b/test/Microsoft.ML.Tests/Transformers/KeyToValueTests.cs index c04f363d07..4ea5be7b96 100644 --- a/test/Microsoft.ML.Tests/Transformers/KeyToValueTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/KeyToValueTests.cs @@ -39,8 +39,8 @@ public void KeyToValueWorkout() var data = reader.Read(dataPath); data = new ValueToKeyMappingEstimator(Env, new[] { - new ValueToKeyMappingEstimator.ColumnInfo("A", "ScalarString"), - new ValueToKeyMappingEstimator.ColumnInfo("B", "VectorString") }).Fit(data).Transform(data); + new ValueToKeyMappingEstimator.ColumnOptions("A", "ScalarString"), + new ValueToKeyMappingEstimator.ColumnOptions("B", "VectorString") }).Fit(data).Transform(data); var badData1 = new ColumnCopyingTransformer(Env, ("A", "BareKey")).Transform(data); var badData2 = new ColumnCopyingTransformer(Env, ("B", "VectorString")).Transform(data); @@ -76,8 +76,8 @@ public void KeyToValuePigsty() // Non-pigsty Term. var dynamicData = new ValueToKeyMappingEstimator(Env, new[] { - new ValueToKeyMappingEstimator.ColumnInfo("A", "ScalarString"), - new ValueToKeyMappingEstimator.ColumnInfo("B", "VectorString") }) + new ValueToKeyMappingEstimator.ColumnOptions("A", "ScalarString"), + new ValueToKeyMappingEstimator.ColumnOptions("B", "VectorString") }) .Fit(data.AsDynamic).Transform(data.AsDynamic); var data2 = dynamicData.AssertStatic(Env, ctx => ( diff --git a/test/Microsoft.ML.Tests/Transformers/KeyToVectorEstimatorTests.cs b/test/Microsoft.ML.Tests/Transformers/KeyToVectorEstimatorTests.cs index 8787fa2b31..3ca6054fb6 100644 --- a/test/Microsoft.ML.Tests/Transformers/KeyToVectorEstimatorTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/KeyToVectorEstimatorTests.cs @@ -53,15 +53,15 @@ public void KeyToVectorWorkout() var dataView = ML.Data.ReadFromEnumerable(data); dataView = new ValueToKeyMappingEstimator(Env, new[]{ - new ValueToKeyMappingEstimator.ColumnInfo("TermA", "A"), - new ValueToKeyMappingEstimator.ColumnInfo("TermB", "B"), - new ValueToKeyMappingEstimator.ColumnInfo("TermC", "C", textKeyValues:true) + new ValueToKeyMappingEstimator.ColumnOptions("TermA", "A"), + new ValueToKeyMappingEstimator.ColumnOptions("TermB", "B"), + new ValueToKeyMappingEstimator.ColumnOptions("TermC", "C", textKeyValues:true) }).Fit(dataView).Transform(dataView); - var pipe = ML.Transforms.Conversion.MapKeyToVector(new KeyToVectorMappingEstimator.ColumnInfo("CatA", "TermA", false), - new KeyToVectorMappingEstimator.ColumnInfo("CatB", "TermB", true), - new KeyToVectorMappingEstimator.ColumnInfo("CatC", "TermC", true), - new KeyToVectorMappingEstimator.ColumnInfo("CatCNonBag", "TermC", false)); + var pipe = ML.Transforms.Conversion.MapKeyToVector(new KeyToVectorMappingEstimator.ColumnOptions("CatA", "TermA", false), + new KeyToVectorMappingEstimator.ColumnOptions("CatB", "TermB", true), + new KeyToVectorMappingEstimator.ColumnOptions("CatC", "TermC", true), + new KeyToVectorMappingEstimator.ColumnOptions("CatCNonBag", "TermC", false)); TestEstimatorCore(pipe, dataView); Done(); } @@ -79,8 +79,8 @@ public void KeyToVectorStatic() // Non-pigsty Term. var dynamicData = new ValueToKeyMappingEstimator(Env, new[] { - new ValueToKeyMappingEstimator.ColumnInfo("A", "ScalarString"), - new ValueToKeyMappingEstimator.ColumnInfo("B", "VectorString") }) + new ValueToKeyMappingEstimator.ColumnOptions("A", "ScalarString"), + new ValueToKeyMappingEstimator.ColumnOptions("B", "VectorString") }) .Fit(data.AsDynamic).Transform(data.AsDynamic); var data2 = dynamicData.AssertStatic(Env, ctx => ( @@ -110,26 +110,26 @@ public void TestMetadataPropagation() var dataView = ML.Data.ReadFromEnumerable(data); var termEst = new ValueToKeyMappingEstimator(Env, new[] { - new ValueToKeyMappingEstimator.ColumnInfo("TA", "A", textKeyValues: true), - new ValueToKeyMappingEstimator.ColumnInfo("TB", "B"), - new ValueToKeyMappingEstimator.ColumnInfo("TC", "C", textKeyValues: true), - new ValueToKeyMappingEstimator.ColumnInfo("TD", "D", textKeyValues: true), - new ValueToKeyMappingEstimator.ColumnInfo("TE", "E"), - new ValueToKeyMappingEstimator.ColumnInfo("TF", "F"), - new ValueToKeyMappingEstimator.ColumnInfo("TG", "G"), - new ValueToKeyMappingEstimator.ColumnInfo("TH", "H", textKeyValues: true) }); + new ValueToKeyMappingEstimator.ColumnOptions("TA", "A", textKeyValues: true), + new ValueToKeyMappingEstimator.ColumnOptions("TB", "B"), + new ValueToKeyMappingEstimator.ColumnOptions("TC", "C", textKeyValues: true), + new ValueToKeyMappingEstimator.ColumnOptions("TD", "D", textKeyValues: true), + new ValueToKeyMappingEstimator.ColumnOptions("TE", "E"), + new ValueToKeyMappingEstimator.ColumnOptions("TF", "F"), + new ValueToKeyMappingEstimator.ColumnOptions("TG", "G"), + new ValueToKeyMappingEstimator.ColumnOptions("TH", "H", textKeyValues: true) }); var termTransformer = termEst.Fit(dataView); dataView = termTransformer.Transform(dataView); var pipe = ML.Transforms.Conversion.MapKeyToVector( - new KeyToVectorMappingEstimator.ColumnInfo("CatA", "TA", true), - new KeyToVectorMappingEstimator.ColumnInfo("CatB", "TB", false), - new KeyToVectorMappingEstimator.ColumnInfo("CatC", "TC", false), - new KeyToVectorMappingEstimator.ColumnInfo("CatD", "TD", true), - new KeyToVectorMappingEstimator.ColumnInfo("CatE", "TE", false), - new KeyToVectorMappingEstimator.ColumnInfo("CatF", "TF", true), - new KeyToVectorMappingEstimator.ColumnInfo("CatG", "TG", true), - new KeyToVectorMappingEstimator.ColumnInfo("CatH", "TH", false) + new KeyToVectorMappingEstimator.ColumnOptions("CatA", "TA", true), + new KeyToVectorMappingEstimator.ColumnOptions("CatB", "TB", false), + new KeyToVectorMappingEstimator.ColumnOptions("CatC", "TC", false), + new KeyToVectorMappingEstimator.ColumnOptions("CatD", "TD", true), + new KeyToVectorMappingEstimator.ColumnOptions("CatE", "TE", false), + new KeyToVectorMappingEstimator.ColumnOptions("CatF", "TF", true), + new KeyToVectorMappingEstimator.ColumnOptions("CatG", "TG", true), + new KeyToVectorMappingEstimator.ColumnOptions("CatH", "TH", false) ); var result = pipe.Fit(dataView).Transform(dataView); @@ -215,15 +215,15 @@ public void TestOldSavingAndLoading() var data = new[] { new TestClass() { A = 1, B = 2, C = 3, }, new TestClass() { A = 4, B = 5, C = 6 } }; var dataView = ML.Data.ReadFromEnumerable(data); var est = new ValueToKeyMappingEstimator(Env, new[]{ - new ValueToKeyMappingEstimator.ColumnInfo("TermA", "A"), - new ValueToKeyMappingEstimator.ColumnInfo("TermB", "B"), - new ValueToKeyMappingEstimator.ColumnInfo("TermC", "C") + new ValueToKeyMappingEstimator.ColumnOptions("TermA", "A"), + new ValueToKeyMappingEstimator.ColumnOptions("TermB", "B"), + new ValueToKeyMappingEstimator.ColumnOptions("TermC", "C") }); var transformer = est.Fit(dataView); dataView = transformer.Transform(dataView); var pipe = ML.Transforms.Conversion.MapKeyToVector( - new KeyToVectorMappingEstimator.ColumnInfo("CatA", "TermA",false), - new KeyToVectorMappingEstimator.ColumnInfo("CatB", "TermB", true) + new KeyToVectorMappingEstimator.ColumnOptions("CatA", "TermA",false), + new KeyToVectorMappingEstimator.ColumnOptions("CatB", "TermB", true) ); var result = pipe.Fit(dataView).Transform(dataView); var resultRoles = new RoleMappedData(result); diff --git a/test/Microsoft.ML.Tests/Transformers/NAIndicatorTests.cs b/test/Microsoft.ML.Tests/Transformers/NAIndicatorTests.cs index 998a265d30..d9a7028e4a 100644 --- a/test/Microsoft.ML.Tests/Transformers/NAIndicatorTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/NAIndicatorTests.cs @@ -46,7 +46,7 @@ public void NAIndicatorWorkout() }; var dataView = ML.Data.ReadFromEnumerable(data); - var pipe = ML.Transforms.IndicateMissingValues(new SimpleColumnInfo[] { ("NAA", "A"), ("NAB", "B"), ("NAC", "C"), ("NAD", "D") }); + var pipe = ML.Transforms.IndicateMissingValues(new ColumnOptions[] { ("NAA", "A"), ("NAB", "B"), ("NAC", "C"), ("NAD", "D") }); TestEstimatorCore(pipe, dataView); Done(); } @@ -69,7 +69,7 @@ public void TestOldSavingAndLoading() }; var dataView = ML.Data.ReadFromEnumerable(data); - var pipe = ML.Transforms.IndicateMissingValues(new SimpleColumnInfo[] { ("NAA", "A"), ("NAB", "B"), ("NAC", "C"), ("NAD", "D") }); + var pipe = ML.Transforms.IndicateMissingValues(new ColumnOptions[] { ("NAA", "A"), ("NAB", "B"), ("NAC", "C"), ("NAD", "D") }); var result = pipe.Fit(dataView).Transform(dataView); var resultRoles = new RoleMappedData(result); using (var ms = new MemoryStream()) @@ -94,7 +94,7 @@ public void NAIndicatorFileOutput() var data = reader.Read(new MultiFileSource(dataPath)).AsDynamic; var wrongCollection = new[] { new TestClass() { A = 1, B = 3, C = new float[2] { 1, 2 }, D = new double[2] { 3, 4 } } }; var invalidData = ML.Data.ReadFromEnumerable(wrongCollection); - var est = ML.Transforms.IndicateMissingValues(new SimpleColumnInfo[] + var est = ML.Transforms.IndicateMissingValues(new ColumnOptions[] { ("A", "ScalarFloat"), ("B", "ScalarDouble"), ("C", "VectorFloat"), ("D", "VectorDoulbe") diff --git a/test/Microsoft.ML.Tests/Transformers/NAReplaceTests.cs b/test/Microsoft.ML.Tests/Transformers/NAReplaceTests.cs index 2e1ac6b36e..152696b73f 100644 --- a/test/Microsoft.ML.Tests/Transformers/NAReplaceTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/NAReplaceTests.cs @@ -43,10 +43,10 @@ public void NAReplaceWorkout() var dataView = ML.Data.ReadFromEnumerable(data); var pipe = ML.Transforms.ReplaceMissingValues( - new MissingValueReplacingEstimator.ColumnInfo("NAA", "A", MissingValueReplacingEstimator.ColumnInfo.ReplacementMode.Mean), - new MissingValueReplacingEstimator.ColumnInfo("NAB", "B", MissingValueReplacingEstimator.ColumnInfo.ReplacementMode.Mean), - new MissingValueReplacingEstimator.ColumnInfo("NAC", "C", MissingValueReplacingEstimator.ColumnInfo.ReplacementMode.Mean), - new MissingValueReplacingEstimator.ColumnInfo("NAD", "D", MissingValueReplacingEstimator.ColumnInfo.ReplacementMode.Mean)); + new MissingValueReplacingEstimator.ColumnOptions("NAA", "A", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean), + new MissingValueReplacingEstimator.ColumnOptions("NAB", "B", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean), + new MissingValueReplacingEstimator.ColumnOptions("NAC", "C", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean), + new MissingValueReplacingEstimator.ColumnOptions("NAD", "D", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean)); TestEstimatorCore(pipe, dataView); Done(); } @@ -68,10 +68,10 @@ public void NAReplaceStatic() var est = data.MakeNewEstimator(). Append(row => ( - A: row.ScalarFloat.ReplaceNaNValues(MissingValueReplacingEstimator.ColumnInfo.ReplacementMode.Maximum), - B: row.ScalarDouble.ReplaceNaNValues(MissingValueReplacingEstimator.ColumnInfo.ReplacementMode.Mean), - C: row.VectorFloat.ReplaceNaNValues(MissingValueReplacingEstimator.ColumnInfo.ReplacementMode.Mean), - D: row.VectorDoulbe.ReplaceNaNValues(MissingValueReplacingEstimator.ColumnInfo.ReplacementMode.Minimum) + A: row.ScalarFloat.ReplaceNaNValues(MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Maximum), + B: row.ScalarDouble.ReplaceNaNValues(MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean), + C: row.VectorFloat.ReplaceNaNValues(MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean), + D: row.VectorDoulbe.ReplaceNaNValues(MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Minimum) )); TestEstimatorCore(est.AsDynamic, data.AsDynamic, invalidInput: invalidData); @@ -104,10 +104,10 @@ public void TestOldSavingAndLoading() var dataView = ML.Data.ReadFromEnumerable(data); var pipe = ML.Transforms.ReplaceMissingValues( - new MissingValueReplacingEstimator.ColumnInfo("NAA", "A", MissingValueReplacingEstimator.ColumnInfo.ReplacementMode.Mean), - new MissingValueReplacingEstimator.ColumnInfo("NAB", "B", MissingValueReplacingEstimator.ColumnInfo.ReplacementMode.Mean), - new MissingValueReplacingEstimator.ColumnInfo("NAC", "C", MissingValueReplacingEstimator.ColumnInfo.ReplacementMode.Mean), - new MissingValueReplacingEstimator.ColumnInfo("NAD", "D", MissingValueReplacingEstimator.ColumnInfo.ReplacementMode.Mean)); + new MissingValueReplacingEstimator.ColumnOptions("NAA", "A", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean), + new MissingValueReplacingEstimator.ColumnOptions("NAB", "B", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean), + new MissingValueReplacingEstimator.ColumnOptions("NAC", "C", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean), + new MissingValueReplacingEstimator.ColumnOptions("NAD", "D", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean)); var result = pipe.Fit(dataView).Transform(dataView); var resultRoles = new RoleMappedData(result); diff --git a/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs b/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs index cd84e9a058..ae9c11e2bc 100644 --- a/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs @@ -46,26 +46,26 @@ public void NormalizerWorkout() }, new MultiFileSource(dataPath)); var est = new NormalizingEstimator(Env, - new NormalizingEstimator.MinMaxColumn("float1"), - new NormalizingEstimator.MinMaxColumn("float4"), - new NormalizingEstimator.MinMaxColumn("double1"), - new NormalizingEstimator.MinMaxColumn("double4"), - new NormalizingEstimator.BinningColumn("float1bin", "float1"), - new NormalizingEstimator.BinningColumn("float4bin", "float4"), - new NormalizingEstimator.BinningColumn("double1bin", "double1"), - new NormalizingEstimator.BinningColumn("double4bin", "double4"), - new NormalizingEstimator.SupervisedBinningColumn("float1supervisedbin", "float1", labelColumn: "int1"), - new NormalizingEstimator.SupervisedBinningColumn("float4supervisedbin", "float4", labelColumn: "int1"), - new NormalizingEstimator.SupervisedBinningColumn("double1supervisedbin", "double1", labelColumn: "int1"), - new NormalizingEstimator.SupervisedBinningColumn("double4supervisedbin", "double4", labelColumn: "int1"), - new NormalizingEstimator.MeanVarColumn("float1mv", "float1"), - new NormalizingEstimator.MeanVarColumn("float4mv", "float4"), - new NormalizingEstimator.MeanVarColumn("double1mv", "double1"), - new NormalizingEstimator.MeanVarColumn("double4mv", "double4"), - new NormalizingEstimator.LogMeanVarColumn("float1lmv", "float1"), - new NormalizingEstimator.LogMeanVarColumn("float4lmv", "float4"), - new NormalizingEstimator.LogMeanVarColumn("double1lmv", "double1"), - new NormalizingEstimator.LogMeanVarColumn("double4lmv", "double4")); + new NormalizingEstimator.MinMaxColumnOptions("float1"), + new NormalizingEstimator.MinMaxColumnOptions("float4"), + new NormalizingEstimator.MinMaxColumnOptions("double1"), + new NormalizingEstimator.MinMaxColumnOptions("double4"), + new NormalizingEstimator.BinningColumnOptions("float1bin", "float1"), + new NormalizingEstimator.BinningColumnOptions("float4bin", "float4"), + new NormalizingEstimator.BinningColumnOptions("double1bin", "double1"), + new NormalizingEstimator.BinningColumnOptions("double4bin", "double4"), + new NormalizingEstimator.SupervisedBinningColumOptions("float1supervisedbin", "float1", labelColumn: "int1"), + new NormalizingEstimator.SupervisedBinningColumOptions("float4supervisedbin", "float4", labelColumn: "int1"), + new NormalizingEstimator.SupervisedBinningColumOptions("double1supervisedbin", "double1", labelColumn: "int1"), + new NormalizingEstimator.SupervisedBinningColumOptions("double4supervisedbin", "double4", labelColumn: "int1"), + new NormalizingEstimator.MeanVarColumnOptions("float1mv", "float1"), + new NormalizingEstimator.MeanVarColumnOptions("float4mv", "float4"), + new NormalizingEstimator.MeanVarColumnOptions("double1mv", "double1"), + new NormalizingEstimator.MeanVarColumnOptions("double4mv", "double4"), + new NormalizingEstimator.LogMeanVarColumnOptions("float1lmv", "float1"), + new NormalizingEstimator.LogMeanVarColumnOptions("float4lmv", "float4"), + new NormalizingEstimator.LogMeanVarColumnOptions("double1lmv", "double1"), + new NormalizingEstimator.LogMeanVarColumnOptions("double4lmv", "double4")); var data = loader.Read(dataPath); @@ -111,22 +111,22 @@ public void NormalizerParameters() }, new MultiFileSource(dataPath)); var est = new NormalizingEstimator(Env, - new NormalizingEstimator.MinMaxColumn("float1"), - new NormalizingEstimator.MinMaxColumn("float4"), - new NormalizingEstimator.MinMaxColumn("double1"), - new NormalizingEstimator.MinMaxColumn("double4"), - new NormalizingEstimator.BinningColumn("float1bin", "float1"), - new NormalizingEstimator.BinningColumn("float4bin", "float4"), - new NormalizingEstimator.BinningColumn("double1bin", "double1"), - new NormalizingEstimator.BinningColumn("double4bin", "double4"), - new NormalizingEstimator.MeanVarColumn("float1mv", "float1"), - new NormalizingEstimator.MeanVarColumn("float4mv", "float4"), - new NormalizingEstimator.MeanVarColumn("double1mv", "double1"), - new NormalizingEstimator.MeanVarColumn("double4mv", "double4"), - new NormalizingEstimator.LogMeanVarColumn("float1lmv", "float1"), - new NormalizingEstimator.LogMeanVarColumn("float4lmv", "float4"), - new NormalizingEstimator.LogMeanVarColumn("double1lmv", "double1"), - new NormalizingEstimator.LogMeanVarColumn("double4lmv", "double4")); + new NormalizingEstimator.MinMaxColumnOptions("float1"), + new NormalizingEstimator.MinMaxColumnOptions("float4"), + new NormalizingEstimator.MinMaxColumnOptions("double1"), + new NormalizingEstimator.MinMaxColumnOptions("double4"), + new NormalizingEstimator.BinningColumnOptions("float1bin", "float1"), + new NormalizingEstimator.BinningColumnOptions("float4bin", "float4"), + new NormalizingEstimator.BinningColumnOptions("double1bin", "double1"), + new NormalizingEstimator.BinningColumnOptions("double4bin", "double4"), + new NormalizingEstimator.MeanVarColumnOptions("float1mv", "float1"), + new NormalizingEstimator.MeanVarColumnOptions("float4mv", "float4"), + new NormalizingEstimator.MeanVarColumnOptions("double1mv", "double1"), + new NormalizingEstimator.MeanVarColumnOptions("double4mv", "double4"), + new NormalizingEstimator.LogMeanVarColumnOptions("float1lmv", "float1"), + new NormalizingEstimator.LogMeanVarColumnOptions("float4lmv", "float4"), + new NormalizingEstimator.LogMeanVarColumnOptions("double1lmv", "double1"), + new NormalizingEstimator.LogMeanVarColumnOptions("double4lmv", "double4")); var data = loader.Read(dataPath); @@ -226,7 +226,7 @@ public void SimpleConstructorsAndExtensions() var est1 = new NormalizingEstimator(Env, "float4"); var est2 = new NormalizingEstimator(Env, NormalizingEstimator.NormalizerMode.MinMax, ("float4", "float4")); - var est3 = new NormalizingEstimator(Env, new NormalizingEstimator.MinMaxColumn("float4")); + var est3 = new NormalizingEstimator(Env, new NormalizingEstimator.MinMaxColumnOptions("float4")); var est4 = ML.Transforms.Normalize(NormalizingEstimator.NormalizerMode.MinMax, ("float4", "float4")); var est5 = ML.Transforms.Normalize("float4"); @@ -247,7 +247,7 @@ public void SimpleConstructorsAndExtensions() // Tests for SupervisedBinning var est6 = new NormalizingEstimator(Env, NormalizingEstimator.NormalizerMode.SupervisedBinning, ("float4", "float4")); - var est7 = new NormalizingEstimator(Env, new NormalizingEstimator.SupervisedBinningColumn("float4")); + var est7 = new NormalizingEstimator(Env, new NormalizingEstimator.SupervisedBinningColumOptions("float4")); var est8 = ML.Transforms.Normalize(NormalizingEstimator.NormalizerMode.SupervisedBinning, ("float4", "float4")); var data6 = est6.Fit(data).Transform(data); diff --git a/test/Microsoft.ML.Tests/Transformers/RffTests.cs b/test/Microsoft.ML.Tests/Transformers/RffTests.cs index dec7254102..e375583b46 100644 --- a/test/Microsoft.ML.Tests/Transformers/RffTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/RffTests.cs @@ -53,8 +53,8 @@ public void RffWorkout() var dataView = ML.Data.ReadFromEnumerable(data); var pipe = ML.Transforms.Projection.CreateRandomFourierFeatures(new[]{ - new RandomFourierFeaturizingEstimator.ColumnInfo("RffA", 5, false, "A"), - new RandomFourierFeaturizingEstimator.ColumnInfo("RffB", 10, true, "A", new LaplacianKernel()) + new RandomFourierFeaturizingEstimator.ColumnOptions("RffA", 5, false, "A"), + new RandomFourierFeaturizingEstimator.ColumnOptions("RffB", 10, true, "A", new LaplacianKernel()) }); TestEstimatorCore(pipe, dataView, invalidInput: invalidData, validForFitNotValidForTransformInput: validFitInvalidData); @@ -103,8 +103,8 @@ public void TestOldSavingAndLoading() var dataView = ML.Data.ReadFromEnumerable(data); var est = ML.Transforms.Projection.CreateRandomFourierFeatures(new[]{ - new RandomFourierFeaturizingEstimator.ColumnInfo("RffA", 5, false, "A"), - new RandomFourierFeaturizingEstimator.ColumnInfo("RffB", 10, true, "A", new LaplacianKernel()) + new RandomFourierFeaturizingEstimator.ColumnOptions("RffA", 5, false, "A"), + new RandomFourierFeaturizingEstimator.ColumnOptions("RffB", 10, true, "A", new LaplacianKernel()) }); var result = est.Fit(dataView).Transform(dataView); var resultRoles = new RoleMappedData(result); diff --git a/test/Microsoft.ML.Tests/Transformers/TextFeaturizerTests.cs b/test/Microsoft.ML.Tests/Transformers/TextFeaturizerTests.cs index 2ddb8b3895..e29d1e8a4b 100644 --- a/test/Microsoft.ML.Tests/Transformers/TextFeaturizerTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/TextFeaturizerTests.cs @@ -174,7 +174,7 @@ public void StopWordsRemoverFromFactory() var tokenized = new WordTokenizingTransformer(ML, new[] { - new WordTokenizingEstimator.ColumnInfo("Text", "Text") + new WordTokenizingEstimator.ColumnOptions("Text", "Text") }).Transform(data); var xf = factory.CreateComponent(ML, tokenized, diff --git a/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs index 06ead6add2..dc35d9f444 100644 --- a/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs @@ -87,7 +87,7 @@ public void ValueMapInputIsVectorTest() var values = new List() { 1, 2, 3, 4 }; var estimator = new WordTokenizingEstimator(Env, new[]{ - new WordTokenizingEstimator.ColumnInfo("TokenizeA", "A") + new WordTokenizingEstimator.ColumnOptions("TokenizeA", "A") }).Append(new ValueMappingEstimator, int>(Env, keys, values, new[] { ("VecD", "TokenizeA"), ("E", "B"), ("F", "C") })); var schema = estimator.GetOutputSchema(SchemaShape.Create(dataView.Schema)); Assert.True(schema.TryFindColumn("VecD", out var originalColumn)); @@ -123,7 +123,7 @@ public void ValueMapInputIsVectorAndValueAsStringKeyTypeTest() var values = new List>() { "a".AsMemory(), "b".AsMemory(), "c".AsMemory(), "d".AsMemory() }; var estimator = new WordTokenizingEstimator(Env, new[]{ - new WordTokenizingEstimator.ColumnInfo("TokenizeA", "A") + new WordTokenizingEstimator.ColumnOptions("TokenizeA", "A") }).Append(new ValueMappingEstimator, ReadOnlyMemory>(Env, keys, values, true, new[] { ("VecD", "TokenizeA"), ("E", "B"), ("F", "C") })); var t = estimator.Fit(dataView); @@ -512,7 +512,7 @@ public void ValueMappingWorkout() var values = new List() { 1, 2, 3, 4 }; // Workout on value mapping - var est = ML.Transforms.Conversion.ValueMap(keys, values, new SimpleColumnInfo[] { ("D", "A"), ("E", "B"), ("F", "C") }); + var est = ML.Transforms.Conversion.ValueMap(keys, values, new ColumnOptions[] { ("D", "A"), ("E", "B"), ("F", "C") }); TestEstimatorCore(est, validFitInput: dataView, invalidInput: badDataView); } @@ -531,7 +531,7 @@ public void ValueMappingValueTypeIsVectorWorkout() new int[] {400, 500, 600, 700 }}; // Workout on value mapping - var est = ML.Transforms.Conversion.ValueMap(keys, values, new SimpleColumnInfo[] { ("D", "A"), ("E", "B"), ("F", "C") }); + var est = ML.Transforms.Conversion.ValueMap(keys, values, new ColumnOptions[] { ("D", "A"), ("E", "B"), ("F", "C") }); TestEstimatorCore(est, validFitInput: dataView, invalidInput: badDataView); } @@ -548,7 +548,7 @@ public void ValueMappingInputIsVectorWorkout() var values = new List() { 1, 2, 3, 4 }; var est = ML.Transforms.Text.TokenizeWords("TokenizeB", "B") - .Append(ML.Transforms.Conversion.ValueMap(keys, values, new SimpleColumnInfo[] { ("VecB", "TokenizeB") })); + .Append(ML.Transforms.Conversion.ValueMap(keys, values, new ColumnOptions[] { ("VecB", "TokenizeB") })); TestEstimatorCore(est, validFitInput: dataView, invalidInput: badDataView); } diff --git a/test/Microsoft.ML.Tests/Transformers/WordTokenizeTests.cs b/test/Microsoft.ML.Tests/Transformers/WordTokenizeTests.cs index 397dbf50ae..ba57e91b59 100644 --- a/test/Microsoft.ML.Tests/Transformers/WordTokenizeTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/WordTokenizeTests.cs @@ -57,8 +57,8 @@ public void WordTokenizeWorkout() var invalidData = new[] { new TestWrong() { A =1, B = new float[2] { 2,3 } } }; var invalidDataView = ML.Data.ReadFromEnumerable(invalidData); var pipe = new WordTokenizingEstimator(Env, new[]{ - new WordTokenizingEstimator.ColumnInfo("TokenizeA", "A"), - new WordTokenizingEstimator.ColumnInfo("TokenizeB", "B"), + new WordTokenizingEstimator.ColumnOptions("TokenizeA", "A"), + new WordTokenizingEstimator.ColumnOptions("TokenizeB", "B"), }); TestEstimatorCore(pipe, dataView, invalidInput: invalidDataView); @@ -99,8 +99,8 @@ public void TestOldSavingAndLoading() var dataView = ML.Data.ReadFromEnumerable(data); var pipe = new WordTokenizingEstimator(Env, new[]{ - new WordTokenizingEstimator.ColumnInfo("TokenizeA", "A"), - new WordTokenizingEstimator.ColumnInfo("TokenizeB", "B"), + new WordTokenizingEstimator.ColumnOptions("TokenizeA", "A"), + new WordTokenizingEstimator.ColumnOptions("TokenizeB", "B"), }); var result = pipe.Fit(dataView).Transform(dataView); var resultRoles = new RoleMappedData(result); From 6aa991173819e6cf4d524398e843bb92b6d4f0d2 Mon Sep 17 00:00:00 2001 From: Artidoro Pagnoni Date: Mon, 25 Feb 2019 09:51:17 -0800 Subject: [PATCH 2/3] fixed some renamings --- ...nfo.cs => VectorWhitenWithColumnOptions.cs} | 0 .../CountFeatureSelection.cs | 18 +++++++++--------- .../HashJoiningTransform.cs | 16 ++++++++-------- 3 files changed, 17 insertions(+), 17 deletions(-) rename docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/{VectorWhitenWithColumnInfo.cs => VectorWhitenWithColumnOptions.cs} (100%) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhitenWithColumnInfo.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhitenWithColumnOptions.cs similarity index 100% rename from docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhitenWithColumnInfo.cs rename to docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhitenWithColumnOptions.cs diff --git a/src/Microsoft.ML.Transforms/CountFeatureSelection.cs b/src/Microsoft.ML.Transforms/CountFeatureSelection.cs index a1f3f4c2ee..4e719a4cd9 100644 --- a/src/Microsoft.ML.Transforms/CountFeatureSelection.cs +++ b/src/Microsoft.ML.Transforms/CountFeatureSelection.cs @@ -183,18 +183,18 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa host.CheckUserArg(Utils.Size(options.Columns) > 0, nameof(options.Columns)); host.CheckUserArg(options.Count > 0, nameof(options.Count)); - var columnOptionss = options.Columns.Select(inColName => new ColumnOptions(inColName, minCount: options.Count)).ToArray(); + var columnOptions = options.Columns.Select(inColName => new ColumnOptions(inColName, minCount: options.Count)).ToArray(); - return new CountFeatureSelectingEstimator(env, columnOptionss).Fit(input).Transform(input) as IDataTransform; + return new CountFeatureSelectingEstimator(env, columnOptions).Fit(input).Transform(input) as IDataTransform; } - private static void CreateDropAndCopyColumns(ColumnOptions[] columnOptionss, int size, long[][] scores, + private static void CreateDropAndCopyColumns(ColumnOptions[] columnOptions, int size, long[][] scores, out int[] selectedCount, out SlotsDroppingTransformer.ColumnOptions[] dropSlotsColumns, out (string outputColumnName, string inputColumnName)[] copyColumnsPairs) { Contracts.Assert(size > 0); Contracts.Assert(Utils.Size(scores) == size); - Contracts.AssertValue(columnOptionss); - Contracts.Assert(Utils.Size(columnOptionss) == size); + Contracts.AssertValue(columnOptions); + Contracts.Assert(Utils.Size(columnOptions) == size); selectedCount = new int[scores.Length]; var dropSlotsCols = new List(); @@ -206,11 +206,11 @@ private static void CreateDropAndCopyColumns(ColumnOptions[] columnOptionss, int selectedCount[i] = 0; for (int j = 0; j < score.Length; j++) { - if (score[j] < columnOptionss[i].MinCount) + if (score[j] < columnOptions[i].MinCount) { // Adjacent slots are combined into a single range. int min = j; - while (j < score.Length && score[j] < columnOptionss[i].MinCount) + while (j < score.Length && score[j] < columnOptions[i].MinCount) j++; int max = j - 1; slots.Add((min, max)); @@ -221,9 +221,9 @@ private static void CreateDropAndCopyColumns(ColumnOptions[] columnOptionss, int selectedCount[i]++; } if (slots.Count <= 0) - copyCols.Add((columnOptionss[i].Name, columnOptionss[i].InputColumnName)); + copyCols.Add((columnOptions[i].Name, columnOptions[i].InputColumnName)); else - dropSlotsCols.Add(new SlotsDroppingTransformer.ColumnOptions(columnOptionss[i].Name, columnOptionss[i].InputColumnName, slots.ToArray())); + dropSlotsCols.Add(new SlotsDroppingTransformer.ColumnOptions(columnOptions[i].Name, columnOptions[i].InputColumnName, slots.ToArray())); } dropSlotsColumns = dropSlotsCols.ToArray(); copyColumnsPairs = copyCols.ToArray(); diff --git a/src/Microsoft.ML.Transforms/HashJoiningTransform.cs b/src/Microsoft.ML.Transforms/HashJoiningTransform.cs index a59046e714..30a93c8107 100644 --- a/src/Microsoft.ML.Transforms/HashJoiningTransform.cs +++ b/src/Microsoft.ML.Transforms/HashJoiningTransform.cs @@ -106,7 +106,7 @@ internal bool TryUnparse(StringBuilder sb) } } - public sealed class ColumnOptionsEx + public sealed class ColumnOptions { // Either VBuffer> or a single Key. // Note that if CustomSlotMap contains only one array, the output type of the transform will a single Key. @@ -124,7 +124,7 @@ public int OutputValueCount get { return OutputColumnType.GetValueCount(); } } - public ColumnOptionsEx(int[][] slotMap, int hashBits, uint hashSeed, bool ordered) + public ColumnOptions(int[][] slotMap, int hashBits, uint hashSeed, bool ordered) { Contracts.CheckValueOrNull(slotMap); Contracts.Check(NumBitsMin <= hashBits && hashBits < NumBitsLim); @@ -173,7 +173,7 @@ private static VersionInfo GetVersionInfo() loaderAssemblyName: typeof(HashJoiningTransform).Assembly.FullName); } - private readonly ColumnOptionsEx[] _exes; + private readonly ColumnOptions[] _exes; /// /// Initializes a new instance of . @@ -204,7 +204,7 @@ public HashJoiningTransform(IHostEnvironment env, Arguments args, IDataView inpu if (args.HashBits < NumBitsMin || args.HashBits >= NumBitsLim) throw Host.ExceptUserArg(nameof(args.HashBits), "hashBits should be between {0} and {1} inclusive", NumBitsMin, NumBitsLim - 1); - _exes = new ColumnOptionsEx[Infos.Length]; + _exes = new ColumnOptions[Infos.Length]; for (int i = 0; i < Infos.Length; i++) { var hashBits = args.Columns[i].HashBits ?? args.HashBits; @@ -238,7 +238,7 @@ private HashJoiningTransform(IHost host, ModelLoadContext ctx, IDataView input) Host.AssertNonEmpty(Infos); - _exes = new ColumnOptionsEx[Infos.Length]; + _exes = new ColumnOptions[Infos.Length]; for (int i = 0; i < Infos.Length; i++) { int hashBits = ctx.Reader.ReadInt32(); @@ -268,7 +268,7 @@ private HashJoiningTransform(IHost host, ModelLoadContext ctx, IDataView input) } } - _exes[i] = new ColumnOptionsEx(slotMap, hashBits, hashSeed, ordered); + _exes[i] = new ColumnOptions(slotMap, hashBits, hashSeed, ordered); } SetMetadata(); @@ -327,7 +327,7 @@ private protected override void SaveModel(ModelSaveContext ctx) } } - private ColumnOptionsEx CreateColumnOptionsEx(bool join, string customSlotMap, int hashBits, uint hashSeed, bool ordered, ColInfo colInfo) + private ColumnOptions CreateColumnOptionsEx(bool join, string customSlotMap, int hashBits, uint hashSeed, bool ordered, ColInfo colInfo) { int[][] slotMap = null; if (colInfo.TypeSrc is VectorType vectorType) @@ -340,7 +340,7 @@ private ColumnOptionsEx CreateColumnOptionsEx(bool join, string customSlotMap, i Host.Assert(Utils.Size(slotMap) >= 1); } - return new ColumnOptionsEx(slotMap, hashBits, hashSeed, ordered); + return new ColumnOptions(slotMap, hashBits, hashSeed, ordered); } private int[][] CompileSlotMap(string slotMapString, int srcSlotCount) From 0ed9ec2fa53289c852325d70c17c666fd3077ac2 Mon Sep 17 00:00:00 2001 From: Artidoro Pagnoni Date: Mon, 25 Feb 2019 21:02:17 -0800 Subject: [PATCH 3/3] cookbook and revert datadebuggerpreview change --- docs/code/MlNetCookBook.md | 6 +++--- src/Microsoft.ML.Data/DataDebuggerPreview.cs | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/code/MlNetCookBook.md b/docs/code/MlNetCookBook.md index 7bdf991a6d..047bb2371f 100644 --- a/docs/code/MlNetCookBook.md +++ b/docs/code/MlNetCookBook.md @@ -629,9 +629,9 @@ var trainData = mlContext.Data.ReadFromTextFile(dataPath, // Apply all kinds of standard ML.NET normalization to the raw features. var pipeline = mlContext.Transforms.Normalize( - new NormalizingEstimator.MinMaxColumn("MinMaxNormalized", "Features", fixZero: true), - new NormalizingEstimator.MeanVarColumn("MeanVarNormalized", "Features", fixZero: true), - new NormalizingEstimator.BinningColumn("BinNormalized", "Features", numBins: 256)); + new NormalizingEstimator.MinMaxColumnOptions("MinMaxNormalized", "Features", fixZero: true), + new NormalizingEstimator.MeanVarColumnOptions("MeanVarNormalized", "Features", fixZero: true), + new NormalizingEstimator.BinningColumnOptions("BinNormalized", "Features", numBins: 256)); // Let's train our pipeline of normalizers, and then apply it to the same data. var normalizedData = pipeline.Fit(trainData).Transform(trainData); diff --git a/src/Microsoft.ML.Data/DataDebuggerPreview.cs b/src/Microsoft.ML.Data/DataDebuggerPreview.cs index 988f4af06a..2a8210bc7c 100644 --- a/src/Microsoft.ML.Data/DataDebuggerPreview.cs +++ b/src/Microsoft.ML.Data/DataDebuggerPreview.cs @@ -22,7 +22,7 @@ internal static class Defaults } public DataViewSchema Schema { get; } - public ImmutableArray ColumnView { get; } + public ImmutableArray ColumnView { get; } public ImmutableArray RowView { get; } internal DataDebuggerPreview(IDataView data, int maxRows = Defaults.MaxRows) @@ -56,7 +56,7 @@ internal DataDebuggerPreview(IDataView data, int maxRows = Defaults.MaxRows) } } RowView = rows.ToImmutableArray(); - ColumnView = Enumerable.Range(0, n).Select(c => new ColumnOptions(data.Schema[c], columns[c].ToArray())).ToImmutableArray(); + ColumnView = Enumerable.Range(0, n).Select(c => new ColumnInfo(data.Schema[c], columns[c].ToArray())).ToImmutableArray(); } public override string ToString() @@ -94,14 +94,14 @@ internal RowInfo(int n) } } - public sealed class ColumnOptions + public sealed class ColumnInfo { public DataViewSchema.Column Column { get; } public object[] Values { get; } public override string ToString() => $"{Column.Name}: {Column.Type}"; - internal ColumnOptions(DataViewSchema.Column column, object[] values) + internal ColumnInfo(DataViewSchema.Column column, object[] values) { Column = column; Values = values;