-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Dataframe csv datetime #5834
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Dataframe csv datetime #5834
Changes from 3 commits
e8720a0
98d7ea2
2777720
63f7c0f
aa816b5
bc636d9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -511,7 +511,15 @@ public DataFrame Append(IEnumerable<object> row = null, bool inPlace = false) | |
| } | ||
| if (value != null) | ||
| { | ||
| value = Convert.ChangeType(value, column.DataType); | ||
| try | ||
| { | ||
| value = Convert.ChangeType(value, column.DataType); | ||
| } | ||
| catch (Exception ex) | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should be catching
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Alright, I'm going to rip this try-catch out of this PR. We can add it later if we want to. I'm more interested in getting the DateTime support in |
||
| { | ||
| throw new FormatException(string.Format(Strings.ValueConversionError, column.Name, ret.Columns.RowCount + 1, ex.Message), ex); | ||
| } | ||
|
|
||
| if (value is null) | ||
| { | ||
| throw new ArgumentException(string.Format(Strings.MismatchedValueType, column.DataType), value.GetType().ToString()); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,314 @@ | ||
| // Licensed to the .NET Foundation under one or more agreements. | ||
| // The .NET Foundation licenses this file to you under the MIT license. | ||
| // See the LICENSE file in the project root for more information. | ||
|
|
||
| using System; | ||
| using System.Collections.Generic; | ||
| using System.Text; | ||
|
|
||
| namespace Microsoft.Data.Analysis | ||
| { | ||
| internal class DateTimeComputation : IPrimitiveColumnComputation<DateTime> | ||
| { | ||
| public void Abs(PrimitiveColumnContainer<DateTime> column) | ||
| { | ||
| throw new NotSupportedException(); | ||
| } | ||
|
|
||
| public void All(PrimitiveColumnContainer<DateTime> column, out bool ret) | ||
| { | ||
| throw new NotSupportedException(); | ||
| } | ||
|
|
||
| public void Any(PrimitiveColumnContainer<DateTime> column, out bool ret) | ||
| { | ||
| throw new NotSupportedException(); | ||
| } | ||
|
|
||
| public void CumulativeMax(PrimitiveColumnContainer<DateTime> column) | ||
| { | ||
| var ret = column.Buffers[0].ReadOnlySpan[0]; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What if it is empty?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I had the same thought when I first saw the PR, so I looked at what the other columns are doing. None of them check for empty here. It's not high priority IMO, so I'm thinking we can fix that for all the columns in a separate PR?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you log an issue for this? So we remember to do it.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| for (int b = 0; b < column.Buffers.Count; b++) | ||
| { | ||
| var buffer = column.Buffers[b]; | ||
| var mutableBuffer = DataFrameBuffer<DateTime>.GetMutableBuffer(buffer); | ||
| var mutableSpan = mutableBuffer.Span; | ||
| var readOnlySpan = buffer.ReadOnlySpan; | ||
| for (int i = 0; i < readOnlySpan.Length; i++) | ||
| { | ||
| var val = readOnlySpan[i]; | ||
|
|
||
| if (val > ret) | ||
| { | ||
| ret = val; | ||
| } | ||
|
|
||
| mutableSpan[i] = ret; | ||
| } | ||
| column.Buffers[b] = mutableBuffer; | ||
| } | ||
| } | ||
|
|
||
| public void CumulativeMax(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows) | ||
| { | ||
| var ret = default(DateTime); | ||
| var mutableBuffer = DataFrameBuffer<DateTime>.GetMutableBuffer(column.Buffers[0]); | ||
| var span = mutableBuffer.Span; | ||
| long minRange = 0; | ||
| long maxRange = ReadOnlyDataFrameBuffer<DateTime>.MaxCapacity; | ||
| long maxCapacity = maxRange; | ||
| IEnumerator<long> enumerator = rows.GetEnumerator(); | ||
| if (enumerator.MoveNext()) | ||
| { | ||
| long row = enumerator.Current; | ||
| if (row < minRange || row >= maxRange) | ||
| { | ||
| int bufferIndex = (int)(row / maxCapacity); | ||
| mutableBuffer = DataFrameBuffer<DateTime>.GetMutableBuffer(column.Buffers[bufferIndex]); | ||
| span = mutableBuffer.Span; | ||
| minRange = checked(bufferIndex * maxCapacity); | ||
| maxRange = checked((bufferIndex + 1) * maxCapacity); | ||
| } | ||
| row -= minRange; | ||
| ret = span[(int)row]; | ||
| } | ||
|
|
||
| while (enumerator.MoveNext()) | ||
| { | ||
| long row = enumerator.Current; | ||
| if (row < minRange || row >= maxRange) | ||
| { | ||
| int bufferIndex = (int)(row / maxCapacity); | ||
| mutableBuffer = DataFrameBuffer<DateTime>.GetMutableBuffer(column.Buffers[bufferIndex]); | ||
| span = mutableBuffer.Span; | ||
| minRange = checked(bufferIndex * maxCapacity); | ||
| maxRange = checked((bufferIndex + 1) * maxCapacity); | ||
| } | ||
| row -= minRange; | ||
|
|
||
| var val = span[(int)row]; | ||
|
|
||
| if (val > ret) | ||
| { | ||
| ret = val; | ||
| } | ||
|
|
||
| span[(int)row] = ret; | ||
| } | ||
| } | ||
|
|
||
| public void CumulativeMin(PrimitiveColumnContainer<DateTime> column) | ||
| { | ||
| var ret = column.Buffers[0].ReadOnlySpan[0]; | ||
| for (int b = 0; b < column.Buffers.Count; b++) | ||
| { | ||
| var buffer = column.Buffers[b]; | ||
| var mutableBuffer = DataFrameBuffer<DateTime>.GetMutableBuffer(buffer); | ||
| var mutableSpan = mutableBuffer.Span; | ||
| var readOnlySpan = buffer.ReadOnlySpan; | ||
| for (int i = 0; i < readOnlySpan.Length; i++) | ||
| { | ||
| var val = readOnlySpan[i]; | ||
|
|
||
| if (val < ret) | ||
| { | ||
| ret = val; | ||
| } | ||
|
|
||
| mutableSpan[i] = ret; | ||
| } | ||
| column.Buffers[b] = mutableBuffer; | ||
| } | ||
| } | ||
|
|
||
| public void CumulativeMin(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows) | ||
| { | ||
| var ret = default(DateTime); | ||
| var mutableBuffer = DataFrameBuffer<DateTime>.GetMutableBuffer(column.Buffers[0]); | ||
| var span = mutableBuffer.Span; | ||
| long minRange = 0; | ||
| long maxRange = ReadOnlyDataFrameBuffer<DateTime>.MaxCapacity; | ||
| long maxCapacity = maxRange; | ||
| IEnumerator<long> enumerator = rows.GetEnumerator(); | ||
| if (enumerator.MoveNext()) | ||
| { | ||
| long row = enumerator.Current; | ||
| if (row < minRange || row >= maxRange) | ||
| { | ||
| int bufferIndex = (int)(row / maxCapacity); | ||
| mutableBuffer = DataFrameBuffer<DateTime>.GetMutableBuffer(column.Buffers[bufferIndex]); | ||
| span = mutableBuffer.Span; | ||
| minRange = checked(bufferIndex * maxCapacity); | ||
| maxRange = checked((bufferIndex + 1) * maxCapacity); | ||
| } | ||
| row -= minRange; | ||
| ret = span[(int)row]; | ||
| } | ||
|
|
||
| while (enumerator.MoveNext()) | ||
| { | ||
| long row = enumerator.Current; | ||
| if (row < minRange || row >= maxRange) | ||
| { | ||
| int bufferIndex = (int)(row / maxCapacity); | ||
| mutableBuffer = DataFrameBuffer<DateTime>.GetMutableBuffer(column.Buffers[bufferIndex]); | ||
| span = mutableBuffer.Span; | ||
| minRange = checked(bufferIndex * maxCapacity); | ||
| maxRange = checked((bufferIndex + 1) * maxCapacity); | ||
| } | ||
| row -= minRange; | ||
|
|
||
| var val = span[(int)row]; | ||
|
|
||
| if (val < ret) | ||
| { | ||
| ret = val; | ||
| } | ||
|
|
||
| span[(int)row] = ret; | ||
| } | ||
| } | ||
|
|
||
| public void CumulativeProduct(PrimitiveColumnContainer<DateTime> column) | ||
| { | ||
| throw new NotSupportedException(); | ||
| } | ||
|
|
||
| public void CumulativeProduct(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows) | ||
| { | ||
| throw new NotSupportedException(); | ||
| } | ||
|
|
||
| public void CumulativeSum(PrimitiveColumnContainer<DateTime> column) | ||
| { | ||
| throw new NotSupportedException(); | ||
| } | ||
|
|
||
| public void CumulativeSum(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows) | ||
| { | ||
| throw new NotSupportedException(); | ||
| } | ||
|
|
||
| public void Max(PrimitiveColumnContainer<DateTime> column, out DateTime ret) | ||
| { | ||
| ret = column.Buffers[0].ReadOnlySpan[0]; | ||
| for (int b = 0; b < column.Buffers.Count; b++) | ||
| { | ||
| var buffer = column.Buffers[b]; | ||
| var readOnlySpan = buffer.ReadOnlySpan; | ||
| for (int i = 0; i < readOnlySpan.Length; i++) | ||
| { | ||
| var val = readOnlySpan[i]; | ||
|
|
||
| if (val > ret) | ||
| { | ||
| ret = val; | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| public void Max(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows, out DateTime ret) | ||
| { | ||
| ret = default; | ||
| var readOnlySpan = column.Buffers[0].ReadOnlySpan; | ||
| long minRange = 0; | ||
| long maxRange = ReadOnlyDataFrameBuffer<DateTime>.MaxCapacity; | ||
| long maxCapacity = maxRange; | ||
| IEnumerator<long> enumerator = rows.GetEnumerator(); | ||
| while (enumerator.MoveNext()) | ||
| { | ||
| long row = enumerator.Current; | ||
| if (row < minRange || row >= maxRange) | ||
| { | ||
| int bufferIndex = (int)(row / maxCapacity); | ||
| readOnlySpan = column.Buffers[bufferIndex].ReadOnlySpan; | ||
| minRange = checked(bufferIndex * maxCapacity); | ||
| maxRange = checked((bufferIndex + 1) * maxCapacity); | ||
| } | ||
| row -= minRange; | ||
|
|
||
| var val = readOnlySpan[(int)row]; | ||
|
|
||
| if (val > ret) | ||
| { | ||
| ret = val; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| public void Min(PrimitiveColumnContainer<DateTime> column, out DateTime ret) | ||
| { | ||
| ret = column.Buffers[0].ReadOnlySpan[0]; | ||
| for (int b = 0; b < column.Buffers.Count; b++) | ||
| { | ||
| var buffer = column.Buffers[b]; | ||
| var readOnlySpan = buffer.ReadOnlySpan; | ||
| for (int i = 0; i < readOnlySpan.Length; i++) | ||
| { | ||
| var val = readOnlySpan[i]; | ||
|
|
||
| if (val < ret) | ||
| { | ||
| ret = val; | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| public void Min(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows, out DateTime ret) | ||
| { | ||
| ret = default; | ||
| var readOnlySpan = column.Buffers[0].ReadOnlySpan; | ||
| long minRange = 0; | ||
| long maxRange = ReadOnlyDataFrameBuffer<DateTime>.MaxCapacity; | ||
| long maxCapacity = maxRange; | ||
| IEnumerator<long> enumerator = rows.GetEnumerator(); | ||
| while (enumerator.MoveNext()) | ||
| { | ||
| long row = enumerator.Current; | ||
| if (row < minRange || row >= maxRange) | ||
| { | ||
| int bufferIndex = (int)(row / maxCapacity); | ||
| readOnlySpan = column.Buffers[bufferIndex].ReadOnlySpan; | ||
| minRange = checked(bufferIndex * maxCapacity); | ||
| maxRange = checked((bufferIndex + 1) * maxCapacity); | ||
| } | ||
| row -= minRange; | ||
|
|
||
| var val = readOnlySpan[(int)row]; | ||
|
|
||
| if (val < ret) | ||
| { | ||
| ret = val; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| public void Product(PrimitiveColumnContainer<DateTime> column, out DateTime ret) | ||
| { | ||
| throw new NotSupportedException(); | ||
| } | ||
|
|
||
| public void Product(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows, out DateTime ret) | ||
| { | ||
| throw new NotSupportedException(); | ||
| } | ||
|
|
||
| public void Sum(PrimitiveColumnContainer<DateTime> column, out DateTime ret) | ||
| { | ||
| throw new NotSupportedException(); | ||
| } | ||
|
|
||
| public void Sum(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows, out DateTime ret) | ||
| { | ||
| throw new NotSupportedException(); | ||
| } | ||
|
|
||
| public void Round(PrimitiveColumnContainer<DateTime> column) | ||
| { | ||
| throw new NotSupportedException(); | ||
| } | ||
|
|
||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How expensive is this
try-catch? It is inside a loop, so it may effect perf.