From c527c218b93f83bc47b7cae60f8d037e72b9c6c5 Mon Sep 17 00:00:00 2001 From: Alexey Smirnov Date: Tue, 27 Apr 2021 01:12:40 +0300 Subject: [PATCH 1/6] fix #5767 issue with DataFrame Merge method --- src/Microsoft.Data.Analysis/DataFrame.Join.cs | 17 ++++++++--------- .../DataFrameTests.cs | 19 +++++++++++++++++++ 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/src/Microsoft.Data.Analysis/DataFrame.Join.cs b/src/Microsoft.Data.Analysis/DataFrame.Join.cs index d5a1278371..381268dee2 100644 --- a/src/Microsoft.Data.Analysis/DataFrame.Join.cs +++ b/src/Microsoft.Data.Analysis/DataFrame.Join.cs @@ -252,9 +252,9 @@ public DataFrame Merge(DataFrame other, string leftJoinColumn, string righ // Hash the column with the smaller RowCount long leftRowCount = Rows.Count; long rightRowCount = other.Rows.Count; - DataFrame longerDataFrame = leftRowCount <= rightRowCount ? other : this; - DataFrame shorterDataFrame = ReferenceEquals(longerDataFrame, this) ? other : this; - DataFrameColumn hashColumn = (leftRowCount <= rightRowCount) ? Columns[leftJoinColumn] : other.Columns[rightJoinColumn]; + + var leftColumnIsSmaller = (leftRowCount <= rightRowCount); + DataFrameColumn hashColumn = leftColumnIsSmaller ? Columns[leftJoinColumn] : other.Columns[rightJoinColumn]; DataFrameColumn otherColumn = ReferenceEquals(hashColumn, Columns[leftJoinColumn]) ? other.Columns[rightJoinColumn] : Columns[leftJoinColumn]; Dictionary> multimap = hashColumn.GroupColumnValues(); @@ -270,23 +270,21 @@ public DataFrame Merge(DataFrame other, string leftJoinColumn, string righ { if (hashColumn[row] == null) { - leftRowIndices.Append(row); - rightRowIndices.Append(i); + leftRowIndices.Append(leftColumnIsSmaller ? row : i); + rightRowIndices.Append(leftColumnIsSmaller ? i : row); } } else { if (hashColumn[row] != null) { - leftRowIndices.Append(row); - rightRowIndices.Append(i); + leftRowIndices.Append(leftColumnIsSmaller ? row : i); + rightRowIndices.Append(leftColumnIsSmaller ? i : row); } } } } } - leftDataFrame = shorterDataFrame; - rightDataFrame = longerDataFrame; } else if (joinAlgorithm == JoinAlgorithm.FullOuter) { @@ -366,4 +364,5 @@ public DataFrame Merge(DataFrame other, string leftJoinColumn, string righ } } + } diff --git a/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs b/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs index 300babbffb..72072fd533 100644 --- a/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs +++ b/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs @@ -1579,6 +1579,25 @@ public void TestSample() Assert.Throws(()=> df.Sample(13)); } + [Theory] + [InlineData(1, 2)] + [InlineData(2, 1)] + public void TestDataCorrectnessForInnerMerge(int leftCount, int rightCount) + { + DataFrame left = MakeDataFrameWithNumericColumns(leftCount, false); + DataFrameColumn leftStringColumn = new StringDataFrameColumn("String", Enumerable.Range(0, leftCount).Select(x => "Left")); + left.Columns.Insert(left.Columns.Count, leftStringColumn); + + DataFrame right = MakeDataFrameWithNumericColumns(rightCount, false); + DataFrameColumn rightStringColumn = new StringDataFrameColumn("String", Enumerable.Range(0, rightCount).Select(x => "Right")); + right.Columns.Insert(right.Columns.Count, rightStringColumn); + + DataFrame merge = left.Merge(right, "Int", "Int", joinAlgorithm: JoinAlgorithm.Inner); + + Assert.Equal("Left", (string)merge.Columns["String_left"][0]); + Assert.Equal("Right", (string)merge.Columns["String_right"][0]); + } + [Fact] public void TestMerge() { From 05807104b471661aba732af88599372423f7bdb5 Mon Sep 17 00:00:00 2001 From: Alexey Smirnov Date: Fri, 28 May 2021 22:54:36 +0300 Subject: [PATCH 2/6] #5820 Extend DataFrame GroupBy operations --- src/Microsoft.Data.Analysis/DataFrame.cs | 22 ++++ src/Microsoft.Data.Analysis/GroupBy.cs | 11 +- src/Microsoft.Data.Analysis/Grouping.cs | 32 ++++++ src/Microsoft.Data.Analysis/Strings.resx | 3 + .../DataFrameGroupByTests.cs | 107 ++++++++++++++++++ 5 files changed, 174 insertions(+), 1 deletion(-) create mode 100644 src/Microsoft.Data.Analysis/Grouping.cs create mode 100644 test/Microsoft.Data.Analysis.Tests/DataFrameGroupByTests.cs diff --git a/src/Microsoft.Data.Analysis/DataFrame.cs b/src/Microsoft.Data.Analysis/DataFrame.cs index 8eb04797aa..ea3ded0f83 100644 --- a/src/Microsoft.Data.Analysis/DataFrame.cs +++ b/src/Microsoft.Data.Analysis/DataFrame.cs @@ -367,6 +367,28 @@ public GroupBy GroupBy(string columnName) DataFrameColumn column = _columnCollection[columnIndex]; return column.GroupBy(columnIndex, this); } + + /// + /// Groups the rows of the by unique values in the column. + /// + /// Type of column used for grouping + /// The column used to group unique values + /// A GroupBy object that stores the group information. + public GroupBy GroupBy(string columnName) + { + int columnIndex = _columnCollection.IndexOf(columnName); + if (columnIndex == -1) + throw new ArgumentException(String.Format(Strings.InvalidColumnName, columnName), nameof(columnName)); + + DataFrameColumn column = _columnCollection[columnIndex]; + + var group = column.GroupBy(columnIndex, this) as GroupBy; + + if (group == null) + throw new InvalidCastException(String.Format(Strings.BadColumnCast, columnName, column.DataType, typeof(TKey))); + + return group; + } // In GroupBy and ReadCsv calls, columns get resized. We need to set the RowCount to reflect the true Length of the DataFrame. This does internal validation internal void SetTableRowCount(long rowCount) diff --git a/src/Microsoft.Data.Analysis/GroupBy.cs b/src/Microsoft.Data.Analysis/GroupBy.cs index 5d8013e9b6..acbdd3cbd7 100644 --- a/src/Microsoft.Data.Analysis/GroupBy.cs +++ b/src/Microsoft.Data.Analysis/GroupBy.cs @@ -3,7 +3,9 @@ // See the LICENSE file in the project root for more information. using System; +using System.Collections; using System.Collections.Generic; +using System.Linq; namespace Microsoft.Data.Analysis { @@ -70,7 +72,7 @@ public abstract class GroupBy public abstract DataFrame Mean(params string[] columnNames); } - public class GroupBy : GroupBy + public class GroupBy : GroupBy { private int _groupByColumnIndex; private IDictionary> _keyToRowIndicesMap; @@ -464,5 +466,12 @@ public override DataFrame Mean(params string[] columnNames) return ret; } + public IEnumerable> Groupings + { + get + { + return _keyToRowIndicesMap.Select(kvp => new Grouping(kvp.Key, kvp.Value.Select(index => _dataFrame.Rows[index]).ToArray())); + } + } } } diff --git a/src/Microsoft.Data.Analysis/Grouping.cs b/src/Microsoft.Data.Analysis/Grouping.cs new file mode 100644 index 0000000000..fcbd037568 --- /dev/null +++ b/src/Microsoft.Data.Analysis/Grouping.cs @@ -0,0 +1,32 @@ +using System; +using System.Collections; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace Microsoft.Data.Analysis +{ + public class Grouping : IGrouping + { + private readonly TKey _key; + private readonly ICollection _rows; + + public Grouping(TKey key, ICollection rows) + { + _key = key; + _rows = rows; + } + + public TKey Key => _key; + + public IEnumerator GetEnumerator() + { + return _rows.GetEnumerator(); + } + + IEnumerator IEnumerable.GetEnumerator() + { + return _rows.GetEnumerator(); + } + } +} diff --git a/src/Microsoft.Data.Analysis/Strings.resx b/src/Microsoft.Data.Analysis/Strings.resx index de91078cec..db6dfb4984 100644 --- a/src/Microsoft.Data.Analysis/Strings.resx +++ b/src/Microsoft.Data.Analysis/Strings.resx @@ -120,6 +120,9 @@ Cannot cast column holding {0} values to type {1} + + Cannot cast elements of column '{0}' type of {1} to type {2} used as TKey in grouping + Line {0} cannot be parsed with the current Delimiters. diff --git a/test/Microsoft.Data.Analysis.Tests/DataFrameGroupByTests.cs b/test/Microsoft.Data.Analysis.Tests/DataFrameGroupByTests.cs new file mode 100644 index 0000000000..91325338bb --- /dev/null +++ b/test/Microsoft.Data.Analysis.Tests/DataFrameGroupByTests.cs @@ -0,0 +1,107 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using Xunit; + +namespace Microsoft.Data.Analysis.Tests +{ + public class DataFrameGroupByTests + { + [Fact] + public void TestGroupingWithTKeyTypeofString() + { + int lenght = 11; + + //Create test dataframe (numbers starting from 0 up to lenght) + DataFrame df = MakeTestDataFrame(lenght); + + var grouping = df.GroupBy("Parity").Groupings; + + //Check groups count + Assert.Equal(2, grouping.Count()); + + //Check number of elements in each group + var oddGroup = grouping.Where(gr => gr.Key == "odd").FirstOrDefault(); + Assert.NotNull(oddGroup); + Assert.Equal(lenght/2, oddGroup.Count()); + + var evenGroup = grouping.Where(gr => gr.Key == "even").FirstOrDefault(); + Assert.NotNull(evenGroup); + Assert.Equal(lenght / 2 + lenght % 2, evenGroup.Count()); + + //Check corner cases + lenght = 0; + df = MakeTestDataFrame(lenght); + grouping = df.GroupBy("Parity").Groupings; + Assert.Empty(grouping); + + lenght = 1; + df = MakeTestDataFrame(lenght); + grouping = df.GroupBy("Parity").Groupings; + Assert.Single(grouping); + Assert.Equal("even", grouping.First().Key); + } + + [Fact] + public void TestGroupingWithTKeyPrimitiveType() + { + const int lenght = 55; + + //Create test dataframe (numbers starting from 0 up to lenght) + DataFrame df = MakeTestDataFrame(lenght); + + //Group elements by int column, that contain the amount of full tens in each int + var groupings = df.GroupBy("Tens").Groupings.ToDictionary(g => g.Key, g => g.ToList()); + + //Get the amount of all number based columns + int numberColumnsCount = df.Columns.Count - 2; //except "Parity" and "Tens" columns + + //Check each group + for (int i = 0; i < lenght / 10; i++) + { + Assert.Equal(10, groupings[i].Count()); + + var rows = groupings[i]; + for (int colIndex = 0; colIndex < numberColumnsCount; colIndex++) + { + var values = rows.Select(row => Convert.ToInt32(row[colIndex])); + + for (int j = 0; j < 10; j++) + { + Assert.Contains(i * 10 + j, values); + } + } + } + + //Last group should contain smaller amount of items + Assert.Equal(lenght % 10, groupings[lenght / 10].Count()); + } + + [Fact] + public void TestGroupingWithTKeyOfWrongType() + { + const int lenght = 5; + + var message = string.Empty; + + //Create test dataframe (numbers starting from 0 up to lenght) + DataFrame df = MakeTestDataFrame(lenght); + + //Use wrong type for grouping + Assert.Throws(() => df.GroupBy("Tens")); + } + + + private DataFrame MakeTestDataFrame(int length) + { + DataFrame df = DataFrameTests.MakeDataFrameWithNumericColumns(length, false); + DataFrameColumn parityColumn = new StringDataFrameColumn("Parity", Enumerable.Range(0, length).Select(x => x % 2 == 0 ? "even" : "odd")); + DataFrameColumn tensColumn = new Int32DataFrameColumn("Tens", Enumerable.Range(0, length).Select(x => x / 10)); + df.Columns.Insert(df.Columns.Count, parityColumn); + df.Columns.Insert(df.Columns.Count, tensColumn); + + return df; + } + } +} From f7658b2d65576f07853da870d03c121cbb099471 Mon Sep 17 00:00:00 2001 From: Alexey Smirnov Date: Sat, 29 May 2021 12:58:21 +0300 Subject: [PATCH 3/6] #5820 fix code review findings --- src/Microsoft.Data.Analysis/GroupBy.cs | 33 +++++++++++++++-- src/Microsoft.Data.Analysis/Grouping.cs | 32 ---------------- .../DataFrameGroupByTests.cs | 37 ++++++++++++------- 3 files changed, 53 insertions(+), 49 deletions(-) delete mode 100644 src/Microsoft.Data.Analysis/Grouping.cs diff --git a/src/Microsoft.Data.Analysis/GroupBy.cs b/src/Microsoft.Data.Analysis/GroupBy.cs index acbdd3cbd7..4defc4ec3a 100644 --- a/src/Microsoft.Data.Analysis/GroupBy.cs +++ b/src/Microsoft.Data.Analysis/GroupBy.cs @@ -72,8 +72,35 @@ public abstract class GroupBy public abstract DataFrame Mean(params string[] columnNames); } - public class GroupBy : GroupBy + public class GroupBy : GroupBy { + #region Internal class that implements IGrouping LINQ interface + internal class Grouping : IGrouping + { + private readonly TKey _key; + private readonly IEnumerable _rows; + + public Grouping(TKey key, IEnumerable rows) + { + _key = key; + _rows = rows; + } + + public TKey Key => _key; + + public IEnumerator GetEnumerator() + { + return _rows.GetEnumerator(); + } + + IEnumerator IEnumerable.GetEnumerator() + { + return _rows.GetEnumerator(); + } + } + + #endregion + private int _groupByColumnIndex; private IDictionary> _keyToRowIndicesMap; private DataFrame _dataFrame; @@ -468,9 +495,9 @@ public override DataFrame Mean(params string[] columnNames) public IEnumerable> Groupings { - get + get { - return _keyToRowIndicesMap.Select(kvp => new Grouping(kvp.Key, kvp.Value.Select(index => _dataFrame.Rows[index]).ToArray())); + return _keyToRowIndicesMap.Select(kvp => new Grouping(kvp.Key, kvp.Value.Select(index => _dataFrame.Rows[index]))); } } } diff --git a/src/Microsoft.Data.Analysis/Grouping.cs b/src/Microsoft.Data.Analysis/Grouping.cs deleted file mode 100644 index fcbd037568..0000000000 --- a/src/Microsoft.Data.Analysis/Grouping.cs +++ /dev/null @@ -1,32 +0,0 @@ -using System; -using System.Collections; -using System.Collections.Generic; -using System.Linq; -using System.Text; - -namespace Microsoft.Data.Analysis -{ - public class Grouping : IGrouping - { - private readonly TKey _key; - private readonly ICollection _rows; - - public Grouping(TKey key, ICollection rows) - { - _key = key; - _rows = rows; - } - - public TKey Key => _key; - - public IEnumerator GetEnumerator() - { - return _rows.GetEnumerator(); - } - - IEnumerator IEnumerable.GetEnumerator() - { - return _rows.GetEnumerator(); - } - } -} diff --git a/test/Microsoft.Data.Analysis.Tests/DataFrameGroupByTests.cs b/test/Microsoft.Data.Analysis.Tests/DataFrameGroupByTests.cs index 91325338bb..57bac29d6e 100644 --- a/test/Microsoft.Data.Analysis.Tests/DataFrameGroupByTests.cs +++ b/test/Microsoft.Data.Analysis.Tests/DataFrameGroupByTests.cs @@ -1,4 +1,8 @@ -using System; +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; using System.Collections.Generic; using System.Linq; using System.Text; @@ -11,10 +15,10 @@ public class DataFrameGroupByTests [Fact] public void TestGroupingWithTKeyTypeofString() { - int lenght = 11; + const int lenght = 11; //Create test dataframe (numbers starting from 0 up to lenght) - DataFrame df = MakeTestDataFrame(lenght); + DataFrame df = MakeTestDataFrameWithParityAndTensColumns(lenght); var grouping = df.GroupBy("Parity").Groupings; @@ -30,26 +34,32 @@ public void TestGroupingWithTKeyTypeofString() Assert.NotNull(evenGroup); Assert.Equal(lenght / 2 + lenght % 2, evenGroup.Count()); + + } + + [Fact] + public void TestGroupingWithTKey_CornerCases() + { //Check corner cases - lenght = 0; - df = MakeTestDataFrame(lenght); - grouping = df.GroupBy("Parity").Groupings; + var df = MakeTestDataFrameWithParityAndTensColumns(0); + var grouping = df.GroupBy("Parity").Groupings; Assert.Empty(grouping); - lenght = 1; - df = MakeTestDataFrame(lenght); + + df = MakeTestDataFrameWithParityAndTensColumns(1); grouping = df.GroupBy("Parity").Groupings; Assert.Single(grouping); Assert.Equal("even", grouping.First().Key); } - + + [Fact] public void TestGroupingWithTKeyPrimitiveType() { const int lenght = 55; //Create test dataframe (numbers starting from 0 up to lenght) - DataFrame df = MakeTestDataFrame(lenght); + DataFrame df = MakeTestDataFrameWithParityAndTensColumns(lenght); //Group elements by int column, that contain the amount of full tens in each int var groupings = df.GroupBy("Tens").Groupings.ToDictionary(g => g.Key, g => g.ToList()); @@ -80,20 +90,19 @@ public void TestGroupingWithTKeyPrimitiveType() [Fact] public void TestGroupingWithTKeyOfWrongType() - { - const int lenght = 5; + { var message = string.Empty; //Create test dataframe (numbers starting from 0 up to lenght) - DataFrame df = MakeTestDataFrame(lenght); + DataFrame df = MakeTestDataFrameWithParityAndTensColumns(1); //Use wrong type for grouping Assert.Throws(() => df.GroupBy("Tens")); } - private DataFrame MakeTestDataFrame(int length) + private DataFrame MakeTestDataFrameWithParityAndTensColumns(int length) { DataFrame df = DataFrameTests.MakeDataFrameWithNumericColumns(length, false); DataFrameColumn parityColumn = new StringDataFrameColumn("Parity", Enumerable.Range(0, length).Select(x => x % 2 == 0 ? "even" : "odd")); From 2c170dcff43bf54303dab644b86ee4fc50d80fe4 Mon Sep 17 00:00:00 2001 From: Alexey Smirnov Date: Wed, 2 Jun 2021 23:15:57 +0300 Subject: [PATCH 4/6] #5820 fix code review findings --- src/Microsoft.Data.Analysis/GroupBy.cs | 5 ++++- .../DataFrameGroupByTests.cs | 16 ++++++++-------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/Microsoft.Data.Analysis/GroupBy.cs b/src/Microsoft.Data.Analysis/GroupBy.cs index 4defc4ec3a..55f57602ad 100644 --- a/src/Microsoft.Data.Analysis/GroupBy.cs +++ b/src/Microsoft.Data.Analysis/GroupBy.cs @@ -75,7 +75,7 @@ public abstract class GroupBy public class GroupBy : GroupBy { #region Internal class that implements IGrouping LINQ interface - internal class Grouping : IGrouping + private class Grouping : IGrouping { private readonly TKey _key; private readonly IEnumerable _rows; @@ -493,6 +493,9 @@ public override DataFrame Mean(params string[] columnNames) return ret; } + /// + /// Returns the collection of Grouping objects, where each object represent as set fo DataFrameRows having the same Key + /// public IEnumerable> Groupings { get diff --git a/test/Microsoft.Data.Analysis.Tests/DataFrameGroupByTests.cs b/test/Microsoft.Data.Analysis.Tests/DataFrameGroupByTests.cs index 57bac29d6e..fdbc859f7b 100644 --- a/test/Microsoft.Data.Analysis.Tests/DataFrameGroupByTests.cs +++ b/test/Microsoft.Data.Analysis.Tests/DataFrameGroupByTests.cs @@ -15,10 +15,10 @@ public class DataFrameGroupByTests [Fact] public void TestGroupingWithTKeyTypeofString() { - const int lenght = 11; + const int length = 11; //Create test dataframe (numbers starting from 0 up to lenght) - DataFrame df = MakeTestDataFrameWithParityAndTensColumns(lenght); + DataFrame df = MakeTestDataFrameWithParityAndTensColumns(length); var grouping = df.GroupBy("Parity").Groupings; @@ -28,11 +28,11 @@ public void TestGroupingWithTKeyTypeofString() //Check number of elements in each group var oddGroup = grouping.Where(gr => gr.Key == "odd").FirstOrDefault(); Assert.NotNull(oddGroup); - Assert.Equal(lenght/2, oddGroup.Count()); + Assert.Equal(length/2, oddGroup.Count()); var evenGroup = grouping.Where(gr => gr.Key == "even").FirstOrDefault(); Assert.NotNull(evenGroup); - Assert.Equal(lenght / 2 + lenght % 2, evenGroup.Count()); + Assert.Equal(length / 2 + length % 2, evenGroup.Count()); } @@ -56,10 +56,10 @@ public void TestGroupingWithTKey_CornerCases() [Fact] public void TestGroupingWithTKeyPrimitiveType() { - const int lenght = 55; + const int length = 55; //Create test dataframe (numbers starting from 0 up to lenght) - DataFrame df = MakeTestDataFrameWithParityAndTensColumns(lenght); + DataFrame df = MakeTestDataFrameWithParityAndTensColumns(length); //Group elements by int column, that contain the amount of full tens in each int var groupings = df.GroupBy("Tens").Groupings.ToDictionary(g => g.Key, g => g.ToList()); @@ -68,7 +68,7 @@ public void TestGroupingWithTKeyPrimitiveType() int numberColumnsCount = df.Columns.Count - 2; //except "Parity" and "Tens" columns //Check each group - for (int i = 0; i < lenght / 10; i++) + for (int i = 0; i < length / 10; i++) { Assert.Equal(10, groupings[i].Count()); @@ -85,7 +85,7 @@ public void TestGroupingWithTKeyPrimitiveType() } //Last group should contain smaller amount of items - Assert.Equal(lenght % 10, groupings[lenght / 10].Count()); + Assert.Equal(length % 10, groupings[length / 10].Count()); } [Fact] From 4afefd0b5bc2f47658e125d8a3cd385f371e69a8 Mon Sep 17 00:00:00 2001 From: Prashanth Govindarajan Date: Wed, 2 Jun 2021 13:49:46 -0700 Subject: [PATCH 5/6] Update src/Microsoft.Data.Analysis/GroupBy.cs --- src/Microsoft.Data.Analysis/GroupBy.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.Data.Analysis/GroupBy.cs b/src/Microsoft.Data.Analysis/GroupBy.cs index 55f57602ad..64642272d5 100644 --- a/src/Microsoft.Data.Analysis/GroupBy.cs +++ b/src/Microsoft.Data.Analysis/GroupBy.cs @@ -494,7 +494,7 @@ public override DataFrame Mean(params string[] columnNames) } /// - /// Returns the collection of Grouping objects, where each object represent as set fo DataFrameRows having the same Key + /// Returns a collection of Grouping objects, where each object represent a set of DataFrameRows having the same Key /// public IEnumerable> Groupings { From c4d3ad21ecb145beb095a122253b2baa4a4d4266 Mon Sep 17 00:00:00 2001 From: Prashanth Govindarajan Date: Thu, 3 Jun 2021 10:10:57 -0700 Subject: [PATCH 6/6] Fix remaining comments --- src/Microsoft.Data.Analysis/DataFrame.cs | 13 +++++-------- src/Microsoft.Data.Analysis/Strings.Designer.cs | 11 ++++++++++- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/Microsoft.Data.Analysis/DataFrame.cs b/src/Microsoft.Data.Analysis/DataFrame.cs index ea3ded0f83..1bfb4a4784 100644 --- a/src/Microsoft.Data.Analysis/DataFrame.cs +++ b/src/Microsoft.Data.Analysis/DataFrame.cs @@ -376,16 +376,13 @@ public GroupBy GroupBy(string columnName) /// A GroupBy object that stores the group information. public GroupBy GroupBy(string columnName) { - int columnIndex = _columnCollection.IndexOf(columnName); - if (columnIndex == -1) - throw new ArgumentException(String.Format(Strings.InvalidColumnName, columnName), nameof(columnName)); - - DataFrameColumn column = _columnCollection[columnIndex]; - - var group = column.GroupBy(columnIndex, this) as GroupBy; + GroupBy group = GroupBy(columnName) as GroupBy; if (group == null) - throw new InvalidCastException(String.Format(Strings.BadColumnCast, columnName, column.DataType, typeof(TKey))); + { + DataFrameColumn column = this[columnName]; + throw new InvalidCastException(String.Format(Strings.BadColumnCastDuringGrouping, columnName, column.DataType, typeof(TKey))); + } return group; } diff --git a/src/Microsoft.Data.Analysis/Strings.Designer.cs b/src/Microsoft.Data.Analysis/Strings.Designer.cs index ff3cd6cadd..4b24665bf3 100644 --- a/src/Microsoft.Data.Analysis/Strings.Designer.cs +++ b/src/Microsoft.Data.Analysis/Strings.Designer.cs @@ -69,6 +69,15 @@ internal static string BadColumnCast { } } + /// + /// Looks up a localized string similar to Cannot cast elements of column '{0}' type of {1} to type {2} used as TKey in grouping . + /// + internal static string BadColumnCastDuringGrouping { + get { + return ResourceManager.GetString("BadColumnCastDuringGrouping", resourceCulture); + } + } + /// /// Looks up a localized string similar to Line {0} cannot be parsed with the current Delimiters.. /// @@ -365,7 +374,7 @@ internal static string NotSupportedColumnType { return ResourceManager.GetString("NotSupportedColumnType", resourceCulture); } } - + /// /// Looks up a localized string similar to Delimiters is null.. ///