Skip to content

Commit fe95784

Browse files
author
Becca McHenry
committed
cleanup PR
1 parent 2c7553e commit fe95784

File tree

4 files changed

+72
-134
lines changed

4 files changed

+72
-134
lines changed

src/Microsoft.Data.Analysis/VBufferDataFrameColumn.cs

Lines changed: 34 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -71,13 +71,6 @@ protected internal override void Resize(long length)
7171
}
7272
}
7373

74-
/// <summary>
75-
/// Indicates if the value at this <paramref name="index"/> is <see langword="null" />.
76-
/// </summary>
77-
/// <param name="index">The index to look up.</param>
78-
/// <returns>A boolean value indicating the validity at this <paramref name="index"/>.</returns>
79-
public bool IsValid(long index) => NullCount == 0;
80-
8174
public void Append(VBuffer<T> value)
8275
{
8376
List<VBuffer<T>> lastBuffer = _vBuffers[_vBuffers.Count - 1];
@@ -87,8 +80,6 @@ public void Append(VBuffer<T> value)
8780
_vBuffers.Add(lastBuffer);
8881
}
8982
lastBuffer.Add(value);
90-
if (value.Length == 0) //TODO
91-
_nullCount++;
9283
Length++;
9384
}
9485

@@ -168,28 +159,50 @@ public IEnumerator<VBuffer<T>> GetEnumerator()
168159
protected override IEnumerator GetEnumeratorCore() => GetEnumerator();
169160

170161
/// <inheritdoc/>
171-
public override DataFrameColumn Sort(bool ascending = true) => throw new NotSupportedException();
172-
173-
/// <inheritdoc/>
174-
public override DataFrameColumn Clone(DataFrameColumn mapIndices = null, bool invertMapIndices = false, long numberOfNullsToAppend = 0)
162+
protected internal override void AddDataViewColumn(DataViewSchema.Builder builder)
175163
{
176-
throw new NotImplementedException();
164+
builder.AddColumn(Name, GetDataViewType());
177165
}
178166

179167
/// <inheritdoc/>
180-
public VBufferDataFrameColumn<T> FillNulls(VBuffer<T> value, bool inPlace = false)
168+
protected internal override Delegate GetDataViewGetter(DataViewRowCursor cursor)
181169
{
182-
throw new NotImplementedException();
170+
return CreateValueGetterDelegate(cursor);
183171
}
184172

185-
public override DataFrameColumn Clamp<U>(U min, U max, bool inPlace = false) => throw new NotSupportedException();
173+
private ValueGetter<VBuffer<T>> CreateValueGetterDelegate(DataViewRowCursor cursor) =>
174+
(ref VBuffer<T> value) => value = this[cursor.Position];
186175

187-
public override DataFrameColumn Filter<U>(U min, U max) => throw new NotSupportedException();
176+
public override Dictionary<long, ICollection<long>> GetGroupedOccurrences(DataFrameColumn other, out HashSet<long> otherColumnNullIndices)
177+
{
178+
return GetGroupedOccurrences<string>(other, out otherColumnNullIndices);
179+
}
188180

189-
/// <inheritdoc/>
190-
protected internal override void AddDataViewColumn(DataViewSchema.Builder builder)
181+
protected internal override Delegate GetValueGetterUsingCursor(DataViewRowCursor cursor, DataViewSchema.Column schemaColumn)
191182
{
192-
builder.AddColumn(Name, GetDataViewType());
183+
return cursor.GetGetter<VBuffer<T>>(schemaColumn);
184+
}
185+
186+
protected internal override void AddValueUsingCursor(DataViewRowCursor cursor, Delegate getter)
187+
{
188+
long row = cursor.Position;
189+
VBuffer<T> value = default;
190+
Debug.Assert(getter != null, "Excepted getter to be valid");
191+
192+
(getter as ValueGetter<VBuffer<T>>)(ref value);
193+
194+
if (Length > row)
195+
{
196+
this[row] = value;
197+
}
198+
else if (Length == row)
199+
{
200+
Append(value);
201+
}
202+
else
203+
{
204+
throw new IndexOutOfRangeException(nameof(row));
205+
}
193206
}
194207

195208
private static VectorDataViewType GetDataViewType()
@@ -249,58 +262,5 @@ private static VectorDataViewType GetDataViewType()
249262

250263
throw new NotSupportedException();
251264
}
252-
253-
/// <inheritdoc/>
254-
protected internal override Delegate GetDataViewGetter(DataViewRowCursor cursor)
255-
{
256-
return CreateValueGetterDelegate(cursor);
257-
}
258-
259-
private ValueGetter<VBuffer<T>> CreateValueGetterDelegate(DataViewRowCursor cursor) =>
260-
(ref VBuffer<T> value) => value = this[cursor.Position];
261-
262-
263-
/// <inheritdoc/>
264-
public override PrimitiveDataFrameColumn<bool> ElementwiseEquals<U>(U value)
265-
{
266-
throw new NotImplementedException();
267-
}
268-
269-
public override Dictionary<long, ICollection<long>> GetGroupedOccurrences(DataFrameColumn other, out HashSet<long> otherColumnNullIndices)
270-
{
271-
return GetGroupedOccurrences<string>(other, out otherColumnNullIndices);
272-
}
273-
274-
protected internal override Delegate GetValueGetterUsingCursor(DataViewRowCursor cursor, DataViewSchema.Column schemaColumn)
275-
{
276-
return cursor.GetGetter<VBuffer<T>>(schemaColumn);
277-
}
278-
279-
IEnumerator<VBuffer<T>> IEnumerable<VBuffer<T>>.GetEnumerator()
280-
{
281-
throw new NotImplementedException();
282-
}
283-
284-
protected internal override void AddValueUsingCursor(DataViewRowCursor cursor, Delegate getter)
285-
{
286-
long row = cursor.Position;
287-
VBuffer<T> value = default;
288-
Debug.Assert(getter != null, "Excepted getter to be valid");
289-
290-
(getter as ValueGetter<VBuffer<T>>)(ref value);
291-
292-
if (Length > row)
293-
{
294-
this[row] = value;
295-
}
296-
else if (Length == row)
297-
{
298-
Append(value);
299-
}
300-
else
301-
{
302-
throw new IndexOutOfRangeException(nameof(row));
303-
}
304-
}
305265
}
306266
}

src/Microsoft.ML.DataView/VectorType.cs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,7 @@
55
using System;
66
using System.Collections.Immutable;
77
using System.Linq;
8-
using System.Runtime.CompilerServices;
98
using System.Text;
10-
using System.Threading;
119
using Microsoft.ML.Internal.DataView;
1210
using Microsoft.ML.Internal.Utilities;
1311

test/Microsoft.Data.Analysis.Tests/DataFrameIDataViewTests.cs

Lines changed: 31 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -9,23 +9,11 @@
99
using Microsoft.ML;
1010
using Microsoft.ML.Data;
1111
using Xunit;
12+
using Microsoft.ML.Trainers;
13+
1214

1315
namespace Microsoft.Data.Analysis.Tests
1416
{
15-
public class VectorInput
16-
{
17-
[LoadColumn(0, 1)]
18-
[VectorType(2)]
19-
public float[] FloatFeatures { get; set; }
20-
21-
[LoadColumn(3, 4)]
22-
[VectorType(2)]
23-
public Int32[] IntFeatures { get; set; }
24-
25-
[LoadColumn(5)]
26-
public bool Label { get; set; }
27-
}
28-
2917
public partial class DataFrameIDataViewTests
3018
{
3119
[Fact]
@@ -365,25 +353,6 @@ private IDataView GetASampleIDataView()
365353
return data;
366354
}
367355

368-
private IDataView GetASampleIDataViewVBuffer()
369-
{
370-
var mlContext = new MLContext();
371-
372-
// Get a small dataset as an IEnumerable.
373-
var enumerableOfData = new[]
374-
{
375-
new InputData() { Name = "Joey", FilterNext = false, Value = 1.0f },
376-
new InputData() { Name = "Chandler", FilterNext = false , Value = 2.0f},
377-
new InputData() { Name = "Ross", FilterNext = false , Value = 3.0f},
378-
new InputData() { Name = "Monica", FilterNext = true , Value = 4.0f},
379-
new InputData() { Name = "Rachel", FilterNext = true , Value = 5.0f},
380-
new InputData() { Name = "Phoebe", FilterNext = false , Value = 6.0f},
381-
};
382-
383-
IDataView data = mlContext.Data.LoadFromEnumerable(enumerableOfData);
384-
return data;
385-
}
386-
387356
private void VerifyDataFrameColumnAndDataViewColumnValues<T>(string columnName, IDataView data, DataFrame df, int maxRows = -1)
388357
{
389358
int cc = 0;
@@ -467,35 +436,46 @@ public void TestDataFrameFromIDataView_MLData_SelectColumnsAndRows()
467436
}
468437

469438
[Fact]
470-
public void VBufferTest()
439+
public void TestDataFrameFromIDataView_VBufferType()
471440
{
472441
var mlContext = new MLContext();
473442

474-
List<VectorInput> inputs = new List<VectorInput>()
443+
var inputData = new[]
475444
{
476-
new VectorInput()
477-
{
478-
FloatFeatures = new float[] {33, 44},
479-
IntFeatures = new int[] {5, 6},
480-
Label = true
445+
new {
446+
boolFeature = new bool[] {false, false},
447+
byteFeatures = new byte[] {0, 0},
448+
doubleFeatures = new double[] {0, 0},
449+
floatFeatures = new float[] {0, 0},
450+
intFeatures = new int[] {0, 0},
451+
longFeatures = new long[] {0, 0},
452+
sbyteFeatures = new sbyte[] {0, 0},
453+
shortFeatures = new short[] {0, 0},
454+
ushortFeatures = new ushort[] {0, 0},
455+
uintFeatures = new uint[] {0, 0},
456+
ulongFeatures = new ulong[] {0, 0},
481457
},
482-
new VectorInput()
483-
{
484-
FloatFeatures = new float[] {55, 66},
485-
IntFeatures = new int[] {5, 6},
486-
Label = false
458+
new {
459+
boolFeature = new bool[] {false, false},
460+
byteFeatures = new byte[] {0, 0},
461+
doubleFeatures = new double[] {0, 0},
462+
floatFeatures = new float[] {1, 1},
463+
intFeatures = new int[] {0, 0},
464+
longFeatures = new long[] {0, 0},
465+
sbyteFeatures = new sbyte[] {0, 0},
466+
shortFeatures = new short[] {0, 0},
467+
ushortFeatures = new ushort[] {0, 0},
468+
uintFeatures = new uint[] {0, 0},
469+
ulongFeatures = new ulong[] {0, 0},
487470
}
488471
};
489472

490-
var data = mlContext.Data.LoadFromEnumerable<VectorInput>(inputs);
491-
473+
var data = mlContext.Data.LoadFromEnumerable(inputData);
492474
var df = data.ToDataFrame();
493475

494-
Assert.Equal(3, df.Columns.Count);
476+
Assert.Equal(11, df.Columns.Count);
495477
Assert.Equal(2, df.Rows.Count);
496-
497-
var value = df[0, 0];
498-
var a = df.Preview();
499478
}
500479
}
501480
}
481+

test/Microsoft.ML.Core.Tests/UnitTests/TestVBuffer.cs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1098,7 +1098,7 @@ private static void GeneratePair(Random rgen, int len, out VBuffer<float> a, out
10981098
const int cases = 8;
10991099
Contracts.Assert(cases == Enum.GetValues(typeof(GenLogic)).Length);
11001100
subcase = (GenLogic)rgen.Next(cases);
1101-
// VBufferEditor<float> bEditor;
1101+
VBufferEditor<float> bEditor;
11021102
switch (subcase)
11031103
{
11041104
case GenLogic.BothDense:
@@ -1116,21 +1116,21 @@ private static void GeneratePair(Random rgen, int len, out VBuffer<float> a, out
11161116
case GenLogic.BothSparseASameB:
11171117
GenerateVBuffer(rgen, len, rgen.Next(len), out a);
11181118
GenerateVBuffer(rgen, len, a.GetValues().Length, out b);
1119-
/*bEditor = VBufferEditor.CreateFromBuffer(ref b);
1119+
bEditor = VBufferEditor.CreateFromBuffer(ref b);
11201120
for (int i = 0; i < a.GetIndices().Length; ++i)
11211121
bEditor.Indices[i] = a.GetIndices()[i];
1122-
b = bEditor.Commit();*/
1122+
b = bEditor.Commit();
11231123
break;
11241124
case GenLogic.BothSparseASubsetB:
11251125
case GenLogic.BothSparseBSubsetA:
11261126
GenerateVBuffer(rgen, len, rgen.Next(len), out a);
11271127
GenerateVBuffer(rgen, a.GetValues().Length, rgen.Next(a.GetValues().Length), out b);
1128-
/*bEditor = VBufferEditor.Create(ref b, len, b.GetValues().Length);
1128+
bEditor = VBufferEditor.Create(ref b, len, b.GetValues().Length);
11291129
for (int i = 0; i < bEditor.Values.Length; ++i)
11301130
bEditor.Indices[i] = a.GetIndices()[bEditor.Indices[i]];
11311131
b = bEditor.Commit();
11321132
if (subcase == GenLogic.BothSparseASubsetB)
1133-
Utils.Swap(ref a, ref b);*/
1133+
Utils.Swap(ref a, ref b);
11341134
break;
11351135
case GenLogic.BothSparseAUnrelatedB:
11361136
GenerateVBuffer(rgen, len, rgen.Next(len), out a);
@@ -1143,14 +1143,14 @@ private static void GeneratePair(Random rgen, int len, out VBuffer<float> a, out
11431143
if (a.GetValues().Length != 0 && b.GetValues().Length != 0 && a.GetValues().Length != b.GetValues().Length)
11441144
{
11451145
var aEditor = VBufferEditor.CreateFromBuffer(ref a);
1146-
/*bEditor = VBufferEditor.CreateFromBuffer(ref b);
1146+
bEditor = VBufferEditor.CreateFromBuffer(ref b);
11471147
Utils.Shuffle(rgen, aEditor.Indices);
11481148
aEditor.Indices.Slice(boundary).CopyTo(bEditor.Indices);
11491149

11501150
GenericSpanSortHelper<int>.Sort(aEditor.Indices, 0, boundary);
11511151
GenericSpanSortHelper<int>.Sort(bEditor.Indices, 0, bEditor.Indices.Length);
11521152
a = aEditor.CommitTruncated(boundary);
1153-
b = bEditor.Commit();*/
1153+
b = bEditor.Commit();
11541154
}
11551155
if (rgen.Next(2) == 0)
11561156
Utils.Swap(ref a, ref b);

0 commit comments

Comments
 (0)