Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
e8720a0
Give message with line and column of CSV file if data conversion fails
derekdiamond May 11, 2021
fa54742
Delete query.json
derekdiamond May 14, 2021
7c1c28c
Delete CMakeCCompiler.cmake
derekdiamond May 14, 2021
14a1724
Delete CMakeCXXCompiler.cmake
derekdiamond May 14, 2021
8cdafb8
Delete CMakeRCCompiler.cmake
derekdiamond May 14, 2021
36015cd
Delete VSInheritEnvironments.txt
derekdiamond May 14, 2021
bdb1733
Delete CMakeSystem.cmake
derekdiamond May 14, 2021
9182d44
Delete main.c
derekdiamond May 14, 2021
060b0ee
Delete cmake.check_cache
derekdiamond May 14, 2021
b75bb72
Delete foo.h
derekdiamond May 14, 2021
8a5df17
Delete CMakeCXXCompilerId.cpp
derekdiamond May 14, 2021
40be5c8
Delete CMakeCCompilerId.c
derekdiamond May 14, 2021
cc8227a
Delete CMakeCache.txt
derekdiamond May 14, 2021
98d7ea2
Allow parsing of DateTime data in CSV import
derekdiamond May 16, 2021
09319de
Merge branch 'main' of https://github.com/primethought/machinelearnin…
derekdiamond May 16, 2021
caf0af7
Add max and min calculations to DateTime columns
derekdiamond May 16, 2021
ebf42d5
Throw FormatException rather than ArgumentException on conversion err…
derekdiamond May 18, 2021
e465746
Remove unnecessary Cmake output
derekdiamond May 26, 2021
a8397f1
DataFrame I/O Tests for DateTime column
derekdiamond May 26, 2021
40e3644
Added tests for DateTime computations
derekdiamond May 26, 2021
1414728
License header in DateTimeComputation.cs
derekdiamond May 27, 2021
c262714
Delete query.json
derekdiamond May 27, 2021
74900dd
Delete VSInheritEnvironments.txt
derekdiamond May 27, 2021
d4e7ff8
Delete CMakeCCompilerId.c
derekdiamond May 27, 2021
9ebd648
Delete CMakeCache.txt
derekdiamond May 27, 2021
697bd76
Delete CMakeCCompiler.cmake
derekdiamond May 27, 2021
719c0d3
Delete CMakeCXXCompiler.cmake
derekdiamond May 27, 2021
c1e9e8e
Delete CMakeRCCompiler.cmake
derekdiamond May 27, 2021
63da48a
Delete cmake.check_cache
derekdiamond May 27, 2021
f6bc37c
Delete main.c
derekdiamond May 27, 2021
916fc9b
Delete foo.h
derekdiamond May 27, 2021
724b5d4
Delete CMakeSystem.cmake
derekdiamond May 27, 2021
d59f439
Delete CMakeCXXCompilerId.cpp
derekdiamond May 27, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions src/Microsoft.Data.Analysis/DataFrame.IO.cs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@ private static Type GuessKind(int col, List<string[]> read)
++nbline;
continue;
}
bool dateParse = DateTime.TryParse(val, out DateTime dateResult);
if (dateParse)
{
res = DetermineType(nbline == 0, typeof(DateTime), res);
++nbline;
continue;
}

res = DetermineType(nbline == 0, typeof(string), res);
++nbline;
Expand All @@ -71,6 +78,8 @@ private static Type MaxKind(Type a, Type b)
return typeof(float);
if (a == typeof(bool) || b == typeof(bool))
return typeof(bool);
if (a == typeof(DateTime) || b == typeof(DateTime))
return typeof(DateTime);
return typeof(string);
}

Expand Down Expand Up @@ -165,6 +174,10 @@ private static DataFrameColumn CreateColumn(Type kind, string[] columnNames, int
{
ret = new UInt16DataFrameColumn(GetColumnName(columnNames, columnIndex));
}
else if (kind == typeof(DateTime))
{
ret = new PrimitiveDataFrameColumn<DateTime>(GetColumnName(columnNames, columnIndex));
}
else
{
throw new NotSupportedException(nameof(kind));
Expand Down
10 changes: 9 additions & 1 deletion src/Microsoft.Data.Analysis/DataFrame.cs
Original file line number Diff line number Diff line change
Expand Up @@ -511,7 +511,15 @@ public DataFrame Append(IEnumerable<object> row = null, bool inPlace = false)
}
if (value != null)
{
value = Convert.ChangeType(value, column.DataType);
try
{
value = Convert.ChangeType(value, column.DataType);
}
catch (Exception ex)
{
throw new FormatException(string.Format(Strings.ValueConversionError, column.Name, ret.Columns.RowCount + 1, ex.Message), ex);
}

if (value is null)
{
throw new ArgumentException(string.Format(Strings.MismatchedValueType, column.DataType), value.GetType().ToString());
Expand Down
314 changes: 314 additions & 0 deletions src/Microsoft.Data.Analysis/DateTimeComputation.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,314 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;
using System.Text;

namespace Microsoft.Data.Analysis
{
internal class DateTimeComputation : IPrimitiveColumnComputation<DateTime>
{
public void Abs(PrimitiveColumnContainer<DateTime> column)
{
throw new NotSupportedException();
}

public void All(PrimitiveColumnContainer<DateTime> column, out bool ret)
{
throw new NotSupportedException();
}

public void Any(PrimitiveColumnContainer<DateTime> column, out bool ret)
{
throw new NotSupportedException();
}

public void CumulativeMax(PrimitiveColumnContainer<DateTime> column)
{
var ret = column.Buffers[0].ReadOnlySpan[0];
for (int b = 0; b < column.Buffers.Count; b++)
{
var buffer = column.Buffers[b];
var mutableBuffer = DataFrameBuffer<DateTime>.GetMutableBuffer(buffer);
var mutableSpan = mutableBuffer.Span;
var readOnlySpan = buffer.ReadOnlySpan;
for (int i = 0; i < readOnlySpan.Length; i++)
{
var val = readOnlySpan[i];

if (val > ret)
{
ret = val;
}

mutableSpan[i] = ret;
}
column.Buffers[b] = mutableBuffer;
}
}

public void CumulativeMax(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows)
{
var ret = default(DateTime);
var mutableBuffer = DataFrameBuffer<DateTime>.GetMutableBuffer(column.Buffers[0]);
var span = mutableBuffer.Span;
long minRange = 0;
long maxRange = ReadOnlyDataFrameBuffer<DateTime>.MaxCapacity;
long maxCapacity = maxRange;
IEnumerator<long> enumerator = rows.GetEnumerator();
if (enumerator.MoveNext())
{
long row = enumerator.Current;
if (row < minRange || row >= maxRange)
{
int bufferIndex = (int)(row / maxCapacity);
mutableBuffer = DataFrameBuffer<DateTime>.GetMutableBuffer(column.Buffers[bufferIndex]);
span = mutableBuffer.Span;
minRange = checked(bufferIndex * maxCapacity);
maxRange = checked((bufferIndex + 1) * maxCapacity);
}
row -= minRange;
ret = span[(int)row];
}

while (enumerator.MoveNext())
{
long row = enumerator.Current;
if (row < minRange || row >= maxRange)
{
int bufferIndex = (int)(row / maxCapacity);
mutableBuffer = DataFrameBuffer<DateTime>.GetMutableBuffer(column.Buffers[bufferIndex]);
span = mutableBuffer.Span;
minRange = checked(bufferIndex * maxCapacity);
maxRange = checked((bufferIndex + 1) * maxCapacity);
}
row -= minRange;

var val = span[(int)row];

if (val > ret)
{
ret = val;
}

span[(int)row] = ret;
}
}

public void CumulativeMin(PrimitiveColumnContainer<DateTime> column)
{
var ret = column.Buffers[0].ReadOnlySpan[0];
for (int b = 0; b < column.Buffers.Count; b++)
{
var buffer = column.Buffers[b];
var mutableBuffer = DataFrameBuffer<DateTime>.GetMutableBuffer(buffer);
var mutableSpan = mutableBuffer.Span;
var readOnlySpan = buffer.ReadOnlySpan;
for (int i = 0; i < readOnlySpan.Length; i++)
{
var val = readOnlySpan[i];

if (val < ret)
{
ret = val;
}

mutableSpan[i] = ret;
}
column.Buffers[b] = mutableBuffer;
}
}

public void CumulativeMin(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows)
{
var ret = default(DateTime);
var mutableBuffer = DataFrameBuffer<DateTime>.GetMutableBuffer(column.Buffers[0]);
var span = mutableBuffer.Span;
long minRange = 0;
long maxRange = ReadOnlyDataFrameBuffer<DateTime>.MaxCapacity;
long maxCapacity = maxRange;
IEnumerator<long> enumerator = rows.GetEnumerator();
if (enumerator.MoveNext())
{
long row = enumerator.Current;
if (row < minRange || row >= maxRange)
{
int bufferIndex = (int)(row / maxCapacity);
mutableBuffer = DataFrameBuffer<DateTime>.GetMutableBuffer(column.Buffers[bufferIndex]);
span = mutableBuffer.Span;
minRange = checked(bufferIndex * maxCapacity);
maxRange = checked((bufferIndex + 1) * maxCapacity);
}
row -= minRange;
ret = span[(int)row];
}

while (enumerator.MoveNext())
{
long row = enumerator.Current;
if (row < minRange || row >= maxRange)
{
int bufferIndex = (int)(row / maxCapacity);
mutableBuffer = DataFrameBuffer<DateTime>.GetMutableBuffer(column.Buffers[bufferIndex]);
span = mutableBuffer.Span;
minRange = checked(bufferIndex * maxCapacity);
maxRange = checked((bufferIndex + 1) * maxCapacity);
}
row -= minRange;

var val = span[(int)row];

if (val < ret)
{
ret = val;
}

span[(int)row] = ret;
}
}

public void CumulativeProduct(PrimitiveColumnContainer<DateTime> column)
{
throw new NotSupportedException();
}

public void CumulativeProduct(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows)
{
throw new NotSupportedException();
}

public void CumulativeSum(PrimitiveColumnContainer<DateTime> column)
{
throw new NotSupportedException();
}

public void CumulativeSum(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows)
{
throw new NotSupportedException();
}

public void Max(PrimitiveColumnContainer<DateTime> column, out DateTime ret)
{
ret = column.Buffers[0].ReadOnlySpan[0];
for (int b = 0; b < column.Buffers.Count; b++)
{
var buffer = column.Buffers[b];
var readOnlySpan = buffer.ReadOnlySpan;
for (int i = 0; i < readOnlySpan.Length; i++)
{
var val = readOnlySpan[i];

if (val > ret)
{
ret = val;
}
}
}
}

public void Max(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows, out DateTime ret)
{
ret = default;
var readOnlySpan = column.Buffers[0].ReadOnlySpan;
long minRange = 0;
long maxRange = ReadOnlyDataFrameBuffer<DateTime>.MaxCapacity;
long maxCapacity = maxRange;
IEnumerator<long> enumerator = rows.GetEnumerator();
while (enumerator.MoveNext())
{
long row = enumerator.Current;
if (row < minRange || row >= maxRange)
{
int bufferIndex = (int)(row / maxCapacity);
readOnlySpan = column.Buffers[bufferIndex].ReadOnlySpan;
minRange = checked(bufferIndex * maxCapacity);
maxRange = checked((bufferIndex + 1) * maxCapacity);
}
row -= minRange;

var val = readOnlySpan[(int)row];

if (val > ret)
{
ret = val;
}
}
}

public void Min(PrimitiveColumnContainer<DateTime> column, out DateTime ret)
{
ret = column.Buffers[0].ReadOnlySpan[0];
for (int b = 0; b < column.Buffers.Count; b++)
{
var buffer = column.Buffers[b];
var readOnlySpan = buffer.ReadOnlySpan;
for (int i = 0; i < readOnlySpan.Length; i++)
{
var val = readOnlySpan[i];

if (val < ret)
{
ret = val;
}
}
}
}

public void Min(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows, out DateTime ret)
{
ret = default;
var readOnlySpan = column.Buffers[0].ReadOnlySpan;
long minRange = 0;
long maxRange = ReadOnlyDataFrameBuffer<DateTime>.MaxCapacity;
long maxCapacity = maxRange;
IEnumerator<long> enumerator = rows.GetEnumerator();
while (enumerator.MoveNext())
{
long row = enumerator.Current;
if (row < minRange || row >= maxRange)
{
int bufferIndex = (int)(row / maxCapacity);
readOnlySpan = column.Buffers[bufferIndex].ReadOnlySpan;
minRange = checked(bufferIndex * maxCapacity);
maxRange = checked((bufferIndex + 1) * maxCapacity);
}
row -= minRange;

var val = readOnlySpan[(int)row];

if (val < ret)
{
ret = val;
}
}
}

public void Product(PrimitiveColumnContainer<DateTime> column, out DateTime ret)
{
throw new NotSupportedException();
}

public void Product(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows, out DateTime ret)
{
throw new NotSupportedException();
}

public void Sum(PrimitiveColumnContainer<DateTime> column, out DateTime ret)
{
throw new NotSupportedException();
}

public void Sum(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows, out DateTime ret)
{
throw new NotSupportedException();
}

public void Round(PrimitiveColumnContainer<DateTime> column)
{
throw new NotSupportedException();
}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,11 @@ public static IPrimitiveColumnComputation<T> GetComputation<T>()
{
return (IPrimitiveColumnComputation<T>)new UShortComputation();
}
else if (typeof(T) == typeof(DateTime))
{
return (IPrimitiveColumnComputation<T>)new DateTimeComputation();
}

throw new NotSupportedException();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@ namespace Microsoft.Data.Analysis
return (IPrimitiveColumnComputation<T>)new <#=type.ClassPrefix#>Computation();
}
<# } #>
else if (typeof(T) == typeof(DateTime))
{
return (IPrimitiveColumnComputation<T>)new DateTimeComputation();
}

throw new NotSupportedException();
}
}
Expand Down
Loading