diff --git a/ClashBehaviour.cs b/ClashBehaviour.cs
index 743e4a0..da745c5 100644
--- a/ClashBehaviour.cs
+++ b/ClashBehaviour.cs
@@ -1,8 +1,22 @@
namespace Dimension.DataFrame.Extensions;
+///
+/// Defines the behavior when adding a column to a DataFrame and a column with the same name already exists
+///
public enum ClashBehaviour
{
+ ///
+ /// Keep the existing column and do not add the new column
+ ///
KeepOriginal,
+
+ ///
+ /// Remove the existing column and add the new column in its place
+ ///
ReplaceOriginal,
+
+ ///
+ /// Throw an InvalidOperationException when a name clash occurs (default behavior)
+ ///
Exception
}
\ No newline at end of file
diff --git a/DataFrameExtensions.cs b/DataFrameExtensions.cs
index 429477f..7ee665f 100644
--- a/DataFrameExtensions.cs
+++ b/DataFrameExtensions.cs
@@ -47,7 +47,7 @@ public static class DataFrameExtensionsCalculations
public static PrimitiveDataFrameColumn Apply(this PrimitiveDataFrameColumn column, Func operation, string name = "")
where T : unmanaged, INumber
{
- if (operation == null)
+ if (operation is null)
{
throw new ArgumentNullException(nameof(operation));
}
diff --git a/DataFrameExtensionsArithmetic.cs b/DataFrameExtensionsArithmetic.cs
index 8811c42..3f5d568 100644
--- a/DataFrameExtensionsArithmetic.cs
+++ b/DataFrameExtensionsArithmetic.cs
@@ -63,7 +63,7 @@ public static PrimitiveDataFrameColumn Minus(this PrimitiveDataFrameColumn
if (string.IsNullOrEmpty(name))
{
- name = $"{column.Name}_Minus_{columnToSubtract.Name}";
+ name = $"{column.Name}-{columnToSubtract.Name}";
}
return new PrimitiveDataFrameColumn(name, result);
@@ -99,8 +99,8 @@ public static PrimitiveDataFrameColumn Times(this PrimitiveDataFrameColumn
if (string.IsNullOrEmpty(name))
{
- var otherNames = otherColumns.Select(c => c.Name);
- name = $"{column.Name}_Times_{string.Join("_", otherNames)}";
+ var namesToConcat = new[] {column.Name}.Concat(otherColumns.Select(c => c.Name));
+ name = string.Join("*", namesToConcat);
}
return new PrimitiveDataFrameColumn(name, result);
diff --git a/DataFrameExtensionsFilters.cs b/DataFrameExtensionsFilters.cs
index 84a500f..d33d19f 100644
--- a/DataFrameExtensionsFilters.cs
+++ b/DataFrameExtensionsFilters.cs
@@ -49,75 +49,7 @@ public static Microsoft.Data.Analysis.DataFrame Filter(this Microsoft.Data.Analy
var newColumns = new List();
foreach (var column in df.Columns)
{
- DataFrameColumn newColumn;
-
- // Support common numeric types
- if (column.DataType == typeof(int))
- {
- newColumn = new PrimitiveDataFrameColumn(column.Name);
- }
- else if (column.DataType == typeof(long))
- {
- newColumn = new PrimitiveDataFrameColumn(column.Name);
- }
- else if (column.DataType == typeof(float))
- {
- newColumn = new PrimitiveDataFrameColumn(column.Name);
- }
- else if (column.DataType == typeof(double))
- {
- newColumn = new PrimitiveDataFrameColumn(column.Name);
- }
- else if (column.DataType == typeof(decimal))
- {
- newColumn = new PrimitiveDataFrameColumn(column.Name);
- }
- // Support other common types
- else if (column.DataType == typeof(bool))
- {
- newColumn = new PrimitiveDataFrameColumn(column.Name);
- }
- else if (column.DataType == typeof(byte))
- {
- newColumn = new PrimitiveDataFrameColumn(column.Name);
- }
- else if (column.DataType == typeof(sbyte))
- {
- newColumn = new PrimitiveDataFrameColumn(column.Name);
- }
- else if (column.DataType == typeof(short))
- {
- newColumn = new PrimitiveDataFrameColumn(column.Name);
- }
- else if (column.DataType == typeof(ushort))
- {
- newColumn = new PrimitiveDataFrameColumn(column.Name);
- }
- else if (column.DataType == typeof(uint))
- {
- newColumn = new PrimitiveDataFrameColumn(column.Name);
- }
- else if (column.DataType == typeof(ulong))
- {
- newColumn = new PrimitiveDataFrameColumn(column.Name);
- }
- else if (column.DataType == typeof(char))
- {
- newColumn = new PrimitiveDataFrameColumn(column.Name);
- }
- else if (column.DataType == typeof(DateTime))
- {
- newColumn = new PrimitiveDataFrameColumn(column.Name);
- }
- else if (column.DataType == typeof(string))
- {
- newColumn = new StringDataFrameColumn(column.Name);
- }
- else
- {
- throw new NotSupportedException($"Column type {column.DataType.Name} is not supported. Supported types: int, long, float, double, decimal, bool, byte, sbyte, short, ushort, uint, ulong, char, DateTime, string");
- }
-
+ var newColumn = CreateColumnByType(column.DataType, column.Name);
newColumns.Add(newColumn);
}
@@ -137,4 +69,35 @@ public static Microsoft.Data.Analysis.DataFrame Filter(this Microsoft.Data.Analy
return newDf;
}
+
+ ///
+ /// Creates a new DataFrame column based on the specified type
+ ///
+ /// The type of data the column will hold
+ /// The name for the new column
+ /// A new DataFrameColumn of the appropriate type
+ /// Thrown when the data type is not supported
+ private static DataFrameColumn CreateColumnByType(Type dataType, string columnName)
+ {
+ // Use pattern matching for cleaner type checking
+ if (dataType == typeof(int)) return new PrimitiveDataFrameColumn(columnName);
+ if (dataType == typeof(long)) return new PrimitiveDataFrameColumn(columnName);
+ if (dataType == typeof(float)) return new PrimitiveDataFrameColumn(columnName);
+ if (dataType == typeof(double)) return new PrimitiveDataFrameColumn(columnName);
+ if (dataType == typeof(decimal)) return new PrimitiveDataFrameColumn(columnName);
+ if (dataType == typeof(bool)) return new PrimitiveDataFrameColumn(columnName);
+ if (dataType == typeof(byte)) return new PrimitiveDataFrameColumn(columnName);
+ if (dataType == typeof(sbyte)) return new PrimitiveDataFrameColumn(columnName);
+ if (dataType == typeof(short)) return new PrimitiveDataFrameColumn(columnName);
+ if (dataType == typeof(ushort)) return new PrimitiveDataFrameColumn(columnName);
+ if (dataType == typeof(uint)) return new PrimitiveDataFrameColumn(columnName);
+ if (dataType == typeof(ulong)) return new PrimitiveDataFrameColumn(columnName);
+ if (dataType == typeof(char)) return new PrimitiveDataFrameColumn(columnName);
+ if (dataType == typeof(DateTime)) return new PrimitiveDataFrameColumn(columnName);
+ if (dataType == typeof(string)) return new StringDataFrameColumn(columnName);
+
+ throw new NotSupportedException(
+ $"Column type {dataType.Name} is not supported. " +
+ "Supported types: int, long, float, double, decimal, bool, byte, sbyte, short, ushort, uint, ulong, char, DateTime, string");
+ }
}
\ No newline at end of file
diff --git a/DataFrameExtensionsNullsNaNs.cs b/DataFrameExtensionsNullsNaNs.cs
index 85ea02d..903949c 100644
--- a/DataFrameExtensionsNullsNaNs.cs
+++ b/DataFrameExtensionsNullsNaNs.cs
@@ -13,17 +13,17 @@ public static class DataFrameExtensionsNullsNaNs
public static PrimitiveDataFrameColumn DropNulls(this PrimitiveDataFrameColumn column)
where T : unmanaged, INumber
{
- var newColumn = new PrimitiveDataFrameColumn(column.Name, column.Length);
+ var validValues = new List();
foreach (var value in column)
{
var shouldAddValue = value != null && !(value is float f && float.IsNaN(f)) && !(value is double d && double.IsNaN(d));
if (shouldAddValue)
{
- newColumn.Append(value);
+ validValues.Add(value);
}
}
- return newColumn;
+ return new PrimitiveDataFrameColumn(column.Name, validValues);
}
public static Microsoft.Data.Analysis.DataFrame DropNulls(this Microsoft.Data.Analysis.DataFrame df)
diff --git a/DataFrameExtensionsRolling.cs b/DataFrameExtensionsRolling.cs
index cde5484..04bd63f 100644
--- a/DataFrameExtensionsRolling.cs
+++ b/DataFrameExtensionsRolling.cs
@@ -24,6 +24,10 @@ public static PrimitiveDataFrameColumn Rolling(this PrimitiveDataFrameColu
where T : unmanaged, INumber
{
var result = new PrimitiveDataFrameColumn(column.Name + "_Rolling", column.Length);
+
+ // Pre-allocate a reusable buffer to avoid repeated allocations
+ var windowBuffer = new T?[windowSize];
+
for (var i = 0; i < column.Length; i++)
{
if (i < windowSize - 1)
@@ -32,19 +36,20 @@ public static PrimitiveDataFrameColumn Rolling(this PrimitiveDataFrameColu
continue;
}
- var window = new List();
+ // Reuse the buffer instead of creating new List
+ var windowCount = 0;
for (var j = i - windowSize + 1; j <= i; j++)
{
- if (!column[j].HasValue)
+ if (column[j].HasValue)
{
- continue;
+ windowBuffer[windowCount++] = column[j];
}
-
- window.Add(column[j]);
}
- if (window.Count > 0)
+ if (windowCount > 0)
{
+ // Create a span/array view of only the valid values
+ var window = new ArraySegment(windowBuffer, 0, windowCount);
var opResult = operation(window);
result[i] = opResult;
}
diff --git a/DataFrameExtensionsStatistics.cs b/DataFrameExtensionsStatistics.cs
index ee4b999..89f366f 100644
--- a/DataFrameExtensionsStatistics.cs
+++ b/DataFrameExtensionsStatistics.cs
@@ -50,8 +50,8 @@ public static class DataFrameExtensionsStatistics
///
/// Numeric type
/// Column to calculate median for
- /// Median value, or null if column is empty or all values are null
- public static T? Median(this PrimitiveDataFrameColumn column)
+ /// Median value as double, or null if column is empty or all values are null
+ public static double? Median(this PrimitiveDataFrameColumn column)
where T : unmanaged, INumber
{
if (column == null || column.Length == 0)
@@ -59,7 +59,7 @@ public static class DataFrameExtensionsStatistics
return null;
}
- var values = column.Where(v => v.HasValue).Select(v => v!.Value).OrderBy(v => v).ToList();
+ var values = column.Where(v => v.HasValue).Select(v => Convert.ToDouble(v!.Value)).OrderBy(v => v).ToList();
if (values.Count == 0)
{
@@ -71,7 +71,7 @@ public static class DataFrameExtensionsStatistics
if (values.Count % 2 == 0)
{
// Even number of elements - average the two middle values
- return (values[middleIndex - 1] + values[middleIndex]) / T.CreateChecked(2);
+ return (values[middleIndex - 1] + values[middleIndex]) / 2.0;
}
else
{
@@ -95,7 +95,7 @@ public static class DataFrameExtensionsStatistics
}
///
- /// Calculates the variance of a column
+ /// Calculates the variance of a column using Welford's online algorithm for numerical stability
///
/// Numeric type
/// Column to calculate variance for
@@ -109,18 +109,32 @@ public static class DataFrameExtensionsStatistics
return null;
}
- var values = column.Where(v => v.HasValue).Select(v => Convert.ToDouble(v!.Value)).ToList();
+ // Single-pass variance calculation using Welford's algorithm
+ var count = 0;
+ var mean = 0.0;
+ var m2 = 0.0;
- if (values.Count < (sample ? 2 : 1))
+ for (var i = 0; i < column.Length; i++)
{
- return null;
+ var value = column[i];
+ if (value.HasValue)
+ {
+ count++;
+ var doubleValue = Convert.ToDouble(value.Value);
+ var delta = doubleValue - mean;
+ mean += delta / count;
+ var delta2 = doubleValue - mean;
+ m2 += delta * delta2;
+ }
}
- var mean = values.Average();
- var sumOfSquaredDifferences = values.Sum(v => Math.Pow(v - mean, 2));
- var divisor = sample ? values.Count - 1 : values.Count;
+ if (count < (sample ? 2 : 1))
+ {
+ return null;
+ }
- return sumOfSquaredDifferences / divisor;
+ var divisor = sample ? count - 1 : count;
+ return m2 / divisor;
}
///
@@ -212,7 +226,7 @@ public static long Count(this PrimitiveDataFrameColumn column)
/// Numeric type
/// Column to calculate statistics for
/// Tuple containing (count, mean, stddev, min, 25th percentile, median, 75th percentile, max)
- public static (long Count, T? Mean, double? StdDev, T? Min, T? Q25, T? Median, T? Q75, T? Max) Describe(this PrimitiveDataFrameColumn column)
+ public static (long Count, T? Mean, double? StdDev, T? Min, double? Q25, double? Median, double? Q75, T? Max) Describe(this PrimitiveDataFrameColumn column)
where T : unmanaged, INumber
{
var count = column.Count();
@@ -233,8 +247,8 @@ public static (long Count, T? Mean, double? StdDev, T? Min, T? Q25, T? Median, T
/// Numeric type
/// Column to calculate quantile for
/// Quantile to calculate (0.0 to 1.0, e.g., 0.25 for 25th percentile)
- /// Quantile value, or null if column is empty
- public static T? Quantile(this PrimitiveDataFrameColumn column, double quantile)
+ /// Quantile value as double, or null if column is empty
+ public static double? Quantile(this PrimitiveDataFrameColumn column, double quantile)
where T : unmanaged, INumber
{
if (column == null || column.Length == 0 || quantile < 0 || quantile > 1)
@@ -242,7 +256,7 @@ public static (long Count, T? Mean, double? StdDev, T? Min, T? Q25, T? Median, T
return null;
}
- var values = column.Where(v => v.HasValue).Select(v => v!.Value).OrderBy(v => v).ToList();
+ var values = column.Where(v => v.HasValue).Select(v => Convert.ToDouble(v!.Value)).OrderBy(v => v).ToList();
if (values.Count == 0)
{
@@ -258,7 +272,7 @@ public static (long Count, T? Mean, double? StdDev, T? Min, T? Q25, T? Median, T
return values[lowerIndex];
}
- var weight = T.CreateChecked(index - lowerIndex);
+ var weight = index - lowerIndex;
return values[lowerIndex] + weight * (values[upperIndex] - values[lowerIndex]);
}
}
diff --git a/DataFrameExtensionsSugar.cs b/DataFrameExtensionsSugar.cs
index ee4d21e..ee01664 100644
--- a/DataFrameExtensionsSugar.cs
+++ b/DataFrameExtensionsSugar.cs
@@ -78,89 +78,4 @@ public static PrimitiveDataFrameColumn AddTo(this PrimitiveDataFrameColumn
df.Columns.Add(column);
return column;
}
-
- private static bool ValuesAreEqual(T? a, T? b, T relativeTolerance)
- where T : struct, INumber
- {
- if (!a.HasValue && !b.HasValue)
- {
- return true; // Both are null/missing
- }
-
- if (!a.HasValue || !b.HasValue)
- {
- return false; // One is null/missing, the other isn't
- }
-
- // Special handling for NaN values for floating-point types
- if (typeof(T) == typeof(float))
- {
- // Explicitly handle float NaN comparisons
- if (float.IsNaN((float) (object) a) && float.IsNaN((float) (object) b))
- {
- return true;
- }
- }
- else if (typeof(T) == typeof(double))
- {
- // Explicitly handle double NaN comparisons
- if (double.IsNaN((double) (object) a) && double.IsNaN((double) (object) b))
- {
- return true;
- }
- }
-
- // Calculate the absolute difference
- var absoluteDifference = a.Value - b.Value;
- if (absoluteDifference == T.Zero)
- {
- return true;
- }
-
- if (absoluteDifference < T.Zero)
- {
- absoluteDifference *= -T.One;
- }
-
- // Calculate the absolute maximum of the two numbers
- var maxAbsolute = a.Value > b.Value ? a.Value : b.Value;
- if (maxAbsolute == T.Zero)
- {
- // avoid DBZ error
- return a.Value == b.Value;
- }
-
- // Calculate the relative difference based on the maximum absolute value
- var relativeDifference = absoluteDifference / maxAbsolute;
-
- // Check if the relative difference is within the relative tolerance
- return relativeDifference <= relativeTolerance;
- }
-
- private static T GetTolerance() where T : struct, INumber
- {
- // Define tolerance based on type
- if (typeof(T) == typeof(float))
- {
- return (T) (object) (float) 1e-6f; // Example tolerance for float
- }
- else if (typeof(T) == typeof(double))
- {
- return (T) (object) (double) 1e-15; // Example tolerance for double
- }
- else if (typeof(T) == typeof(decimal))
- {
- return (T) (object) (decimal) 1e-28M; // Lower tolerance for decimal
- }
- else if (typeof(T) == typeof(int) || typeof(T) == typeof(long))
- {
- // For integral types, exact match is expected, so tolerance is zero
- return T.Zero;
- }
- else
- {
- // Default tolerance for other types, adjust as necessary
- return T.One / T.CreateChecked(1000000);
- }
- }
}
\ No newline at end of file
diff --git a/Dimension.DataFrame.Extensions.Tests/DataFrameExtensionsIOTests.cs b/Dimension.DataFrame.Extensions.Tests/DataFrameExtensionsIOTests.cs
new file mode 100644
index 0000000..58fe35b
--- /dev/null
+++ b/Dimension.DataFrame.Extensions.Tests/DataFrameExtensionsIOTests.cs
@@ -0,0 +1,274 @@
+using FluentAssertions;
+using Microsoft.Data.Analysis;
+using System;
+using System.IO;
+using Xunit;
+
+namespace Dimension.DataFrame.Extensions.Tests;
+
+public class DataFrameExtensionsIOTests
+{
+ [Fact]
+ public void SaveToCsv_BasicDataFrame_CreatesValidCsv()
+ {
+ // Arrange
+ var df = new Microsoft.Data.Analysis.DataFrame(
+ new PrimitiveDataFrameColumn("ID", new[] { 1, 2, 3 }),
+ new StringDataFrameColumn("Name", new[] { "Alice", "Bob", "Charlie" }),
+ new PrimitiveDataFrameColumn("Score", new[] { 95.5, 87.3, 92.1 })
+ );
+ var tempFile = Path.GetTempFileName();
+
+ try
+ {
+ // Act
+ df.SaveToCsv(tempFile);
+
+ // Assert
+ var content = File.ReadAllText(tempFile);
+ content.Should().Contain("ID,Name,Score");
+ content.Should().Contain("1,Alice,95.5");
+ content.Should().Contain("2,Bob,87.3");
+ content.Should().Contain("3,Charlie,92.1");
+ }
+ finally
+ {
+ if (File.Exists(tempFile))
+ File.Delete(tempFile);
+ }
+ }
+
+ [Fact]
+ public void SaveToCsv_WithCustomSeparator_UsesCorrectSeparator()
+ {
+ // Arrange
+ var df = new Microsoft.Data.Analysis.DataFrame(
+ new PrimitiveDataFrameColumn("A", new[] { 1, 2 }),
+ new PrimitiveDataFrameColumn("B", new[] { 3, 4 })
+ );
+ var tempFile = Path.GetTempFileName();
+
+ try
+ {
+ // Act
+ df.SaveToCsv(tempFile, sep: ";");
+
+ // Assert
+ var content = File.ReadAllText(tempFile);
+ content.Should().Contain("A;B");
+ content.Should().Contain("1;3");
+ content.Should().Contain("2;4");
+ }
+ finally
+ {
+ if (File.Exists(tempFile))
+ File.Delete(tempFile);
+ }
+ }
+
+ [Fact]
+ public void SaveToCsv_WithoutHeader_DoesNotIncludeColumnNames()
+ {
+ // Arrange
+ var df = new Microsoft.Data.Analysis.DataFrame(
+ new PrimitiveDataFrameColumn("ID", new[] { 1, 2 })
+ );
+ var tempFile = Path.GetTempFileName();
+
+ try
+ {
+ // Act
+ df.SaveToCsv(tempFile, includeHeader: false);
+
+ // Assert
+ var content = File.ReadAllText(tempFile);
+ content.Should().NotContain("ID");
+ content.Should().Contain("1");
+ content.Should().Contain("2");
+ }
+ finally
+ {
+ if (File.Exists(tempFile))
+ File.Delete(tempFile);
+ }
+ }
+
+ [Fact]
+ public void SaveToCsv_WithQuotesInData_EscapesCorrectly()
+ {
+ // Arrange
+ var df = new Microsoft.Data.Analysis.DataFrame(
+ new StringDataFrameColumn("Text", new[] { "Hello \"World\"", "Simple text" })
+ );
+ var tempFile = Path.GetTempFileName();
+
+ try
+ {
+ // Act
+ df.SaveToCsv(tempFile);
+
+ // Assert
+ var content = File.ReadAllText(tempFile);
+ content.Should().Contain("\"Hello \"\"World\"\"\""); // RFC 4180: quotes doubled
+ }
+ finally
+ {
+ if (File.Exists(tempFile))
+ File.Delete(tempFile);
+ }
+ }
+
+ [Fact]
+ public void SaveToCsv_WithCommaInData_QuotesField()
+ {
+ // Arrange
+ var df = new Microsoft.Data.Analysis.DataFrame(
+ new StringDataFrameColumn("Text", new[] { "Hello, World", "Simple" })
+ );
+ var tempFile = Path.GetTempFileName();
+
+ try
+ {
+ // Act
+ df.SaveToCsv(tempFile);
+
+ // Assert
+ var content = File.ReadAllText(tempFile);
+ content.Should().Contain("\"Hello, World\""); // RFC 4180: field with comma must be quoted
+ content.Should().Contain("Simple"); // Simple text not quoted
+ }
+ finally
+ {
+ if (File.Exists(tempFile))
+ File.Delete(tempFile);
+ }
+ }
+
+ [Fact]
+ public void SaveToCsv_WithNewlineInData_QuotesField()
+ {
+ // Arrange
+ var df = new Microsoft.Data.Analysis.DataFrame(
+ new StringDataFrameColumn("Text", new[] { "Line1\nLine2", "Simple" })
+ );
+ var tempFile = Path.GetTempFileName();
+
+ try
+ {
+ // Act
+ df.SaveToCsv(tempFile);
+
+ // Assert
+ var content = File.ReadAllText(tempFile);
+ content.Should().Contain("\"Line1\nLine2\""); // RFC 4180: field with newline must be quoted
+ }
+ finally
+ {
+ if (File.Exists(tempFile))
+ File.Delete(tempFile);
+ }
+ }
+
+ [Fact]
+ public void SaveToCsv_WithFormulaInjectionAttempt_SanitizesData()
+ {
+ // Arrange
+ var df = new Microsoft.Data.Analysis.DataFrame(
+ new StringDataFrameColumn("Text", new[] { "=SUM(A1:A10)", "+cmd", "-cmd", "@cmd", "\tcmd", "\rcmd" })
+ );
+ var tempFile = Path.GetTempFileName();
+
+ try
+ {
+ // Act
+ df.SaveToCsv(tempFile);
+
+ // Assert
+ var content = File.ReadAllText(tempFile);
+ // CSV injection prevention: formula characters should be prefixed with single quote
+ content.Should().Contain("'=SUM");
+ content.Should().Contain("'+cmd");
+ content.Should().Contain("'-cmd");
+ content.Should().Contain("'@cmd");
+ content.Should().Contain("'\tcmd");
+ content.Should().Contain("'\rcmd");
+ }
+ finally
+ {
+ if (File.Exists(tempFile))
+ File.Delete(tempFile);
+ }
+ }
+
+ [Fact]
+ public void SaveToCsv_WithNullValues_HandlesCorrectly()
+ {
+ // Arrange
+ var df = new Microsoft.Data.Analysis.DataFrame(
+ new PrimitiveDataFrameColumn("Num", new int?[] { 1, null, 3 }),
+ new StringDataFrameColumn("Text", new[] { "A", null, "C" })
+ );
+ var tempFile = Path.GetTempFileName();
+
+ try
+ {
+ // Act
+ df.SaveToCsv(tempFile);
+
+ // Assert
+ File.Exists(tempFile).Should().BeTrue();
+ var content = File.ReadAllText(tempFile);
+ content.Should().Contain("Num,Text");
+ // Nulls should be represented as empty strings
+ var lines = content.Split(Environment.NewLine, StringSplitOptions.RemoveEmptyEntries);
+ lines.Should().HaveCountGreaterOrEqualTo(3); // Header + 3 data rows
+ }
+ finally
+ {
+ if (File.Exists(tempFile))
+ File.Delete(tempFile);
+ }
+ }
+
+ [Fact]
+ public void SaveToCsv_ToInvalidPath_ThrowsIOException()
+ {
+ // Arrange
+ var df = new Microsoft.Data.Analysis.DataFrame(
+ new PrimitiveDataFrameColumn("A", new[] { 1 })
+ );
+ var invalidPath = "/invalid/path/that/does/not/exist/file.csv";
+
+ // Act & Assert
+ var act = () => df.SaveToCsv(invalidPath);
+ act.Should().Throw();
+ }
+
+ [Fact]
+ public void SaveToCsv_EmptyDataFrame_CreatesFileWithHeaderOnly()
+ {
+ // Arrange
+ var df = new Microsoft.Data.Analysis.DataFrame(
+ new PrimitiveDataFrameColumn("ID"),
+ new StringDataFrameColumn("Name")
+ );
+ var tempFile = Path.GetTempFileName();
+
+ try
+ {
+ // Act
+ df.SaveToCsv(tempFile);
+
+ // Assert
+ var content = File.ReadAllText(tempFile);
+ content.Should().Contain("ID,Name");
+ var lines = content.Split(Environment.NewLine, StringSplitOptions.RemoveEmptyEntries);
+ lines.Should().HaveCount(1); // Only header line
+ }
+ finally
+ {
+ if (File.Exists(tempFile))
+ File.Delete(tempFile);
+ }
+ }
+}
diff --git a/Dimension.DataFrame.Extensions.Tests/DataFrameExtensionsRowsTests.cs b/Dimension.DataFrame.Extensions.Tests/DataFrameExtensionsRowsTests.cs
new file mode 100644
index 0000000..10fcc3e
--- /dev/null
+++ b/Dimension.DataFrame.Extensions.Tests/DataFrameExtensionsRowsTests.cs
@@ -0,0 +1,236 @@
+using FluentAssertions;
+using Microsoft.Data.Analysis;
+using System;
+using Xunit;
+
+namespace Dimension.DataFrame.Extensions.Tests;
+
+public class DataFrameExtensionsRowsTests
+{
+ [Fact]
+ public void AddRow_WithMatchingTypes_AddsRowSuccessfully()
+ {
+ // Arrange
+ var df = new Microsoft.Data.Analysis.DataFrame(
+ new PrimitiveDataFrameColumn("ID", new[] { 1, 2 }),
+ new StringDataFrameColumn("Name", new[] { "Alice", "Bob" })
+ );
+
+ // Act
+ df.AddRow(3, "Charlie");
+
+ // Assert
+ df.Rows.Count.Should().Be(3);
+ df["ID"][2].Should().Be(3);
+ df["Name"][2].Should().Be("Charlie");
+ }
+
+ [Fact]
+ public void AddRow_WithNullableInt_HandlesNullCorrectly()
+ {
+ // Arrange
+ var df = new Microsoft.Data.Analysis.DataFrame(
+ new PrimitiveDataFrameColumn("ID", new int?[] { 1, 2 }),
+ new StringDataFrameColumn("Name", new[] { "Alice", "Bob" })
+ );
+
+ // Act
+ df.AddRow(null, "Charlie");
+
+ // Assert
+ df.Rows.Count.Should().Be(3);
+ df["ID"][2].Should().BeNull();
+ df["Name"][2].Should().Be("Charlie");
+ }
+
+ [Fact]
+ public void AddRow_WithMultipleNumericTypes_AddsCorrectly()
+ {
+ // Arrange
+ var df = new Microsoft.Data.Analysis.DataFrame(
+ new PrimitiveDataFrameColumn("IntCol", new[] { 1 }),
+ new PrimitiveDataFrameColumn("LongCol", new[] { 100L }),
+ new PrimitiveDataFrameColumn("FloatCol", new[] { 1.5f }),
+ new PrimitiveDataFrameColumn("DoubleCol", new[] { 2.5 }),
+ new PrimitiveDataFrameColumn("DecimalCol", new[] { 3.5m })
+ );
+
+ // Act
+ df.AddRow(2, 200L, 2.5f, 3.5, 4.5m);
+
+ // Assert
+ df.Rows.Count.Should().Be(2);
+ df["IntCol"][1].Should().Be(2);
+ df["LongCol"][1].Should().Be(200L);
+ df["FloatCol"][1].Should().Be(2.5f);
+ df["DoubleCol"][1].Should().Be(3.5);
+ df["DecimalCol"][1].Should().Be(4.5m);
+ }
+
+ [Fact]
+ public void AddRow_WithBooleanColumn_AddsCorrectly()
+ {
+ // Arrange
+ var df = new Microsoft.Data.Analysis.DataFrame(
+ new PrimitiveDataFrameColumn("ID", new[] { 1 }),
+ new PrimitiveDataFrameColumn("Active", new[] { true })
+ );
+
+ // Act
+ df.AddRow(2, false);
+
+ // Assert
+ df.Rows.Count.Should().Be(2);
+ df["Active"][1].Should().Be(false);
+ }
+
+ [Fact]
+ public void AddRow_WithDateTimeColumn_AddsCorrectly()
+ {
+ // Arrange
+ var date1 = new DateTime(2024, 1, 1);
+ var date2 = new DateTime(2024, 1, 2);
+ var df = new Microsoft.Data.Analysis.DataFrame(
+ new PrimitiveDataFrameColumn("ID", new[] { 1 }),
+ new PrimitiveDataFrameColumn("Date", new[] { date1 })
+ );
+
+ // Act
+ df.AddRow(2, date2);
+
+ // Assert
+ df.Rows.Count.Should().Be(2);
+ df["Date"][1].Should().Be(date2);
+ }
+
+ [Fact]
+ public void AddRow_WithWrongNumberOfValues_ThrowsArgumentException()
+ {
+ // Arrange
+ var df = new Microsoft.Data.Analysis.DataFrame(
+ new PrimitiveDataFrameColumn("ID", new[] { 1 }),
+ new StringDataFrameColumn("Name", new[] { "Alice" })
+ );
+
+ // Act & Assert
+ var act = () => df.AddRow(2); // Missing Name value
+ act.Should().Throw()
+ .WithMessage("*number of provided values must match*");
+ }
+
+ [Fact]
+ public void AddRow_WithIncompatibleType_ThrowsInvalidOperationException()
+ {
+ // Arrange
+ var df = new Microsoft.Data.Analysis.DataFrame(
+ new PrimitiveDataFrameColumn("ID", new[] { 1 }),
+ new StringDataFrameColumn("Name", new[] { "Alice" })
+ );
+
+ // Act & Assert
+ var act = () => df.AddRow("NotAnInt", "Bob"); // String instead of int
+ act.Should().Throw()
+ .WithMessage("*not compatible*");
+ }
+
+ [Fact]
+ public void AddRow_WithIEnumerable_AddsRowSuccessfully()
+ {
+ // Arrange
+ var df = new Microsoft.Data.Analysis.DataFrame(
+ new PrimitiveDataFrameColumn("ID", new[] { 1, 2 }),
+ new StringDataFrameColumn("Name", new[] { "Alice", "Bob" })
+ );
+ var values = new object[] { 3, "Charlie" };
+
+ // Act
+ df.AddRow((System.Collections.Generic.IEnumerable