From 3187f6f0b694633ac1b2b2720df3ca2f823fc4e2 Mon Sep 17 00:00:00 2001 From: philjdf Date: Tue, 11 Jan 2022 18:17:02 +0000 Subject: [PATCH 1/2] Fix GetEnumerator --- csharp.test/TestLogicalTypeRoundtrip.cs | 43 +++++++++++++++++++++++++ csharp/LogicalColumnReader.cs | 4 ++- 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/csharp.test/TestLogicalTypeRoundtrip.cs b/csharp.test/TestLogicalTypeRoundtrip.cs index 12026627..b198e1f7 100644 --- a/csharp.test/TestLogicalTypeRoundtrip.cs +++ b/csharp.test/TestLogicalTypeRoundtrip.cs @@ -288,6 +288,49 @@ public static void TestNestedStructArray([Values(Repetition.Required, Repetition fileReader.Close(); } + [Test] + public static void TestLargeArraysEnumerator() + { + CheckEnumerator(Enumerable.Range(0, 4100).ToArray()); + CheckEnumerator(Enumerable.Range(0, 4100).Select(i => new[] {$"row {i}"}).ToArray()); + } + + private static void CheckEnumerator(T[] values) + { + using var buffer = new ResizableBuffer(); + + using (var output = new BufferOutputStream(buffer)) + { + var columns = new Column[] {new Column("col0")}; + + using var fileWriter = new ParquetFileWriter(output, columns); + using var rowGroupWriter = fileWriter.AppendBufferedRowGroup(); + + using var col = rowGroupWriter.Column(0).LogicalWriter(); + col.WriteBatch(values); + + fileWriter.Close(); + } + + using (var input = new BufferReader(buffer)) + { + using var fileReader = new ParquetFileReader(input); + using var rowGroupReader = fileReader.RowGroup(0); + + using var col = rowGroupReader.Column(0).LogicalReader(); + + var enumerator = col.GetEnumerator(); + for (var i = 0; i < values.Length; i++) + { + Assert.IsTrue(enumerator.MoveNext()); + Assert.AreEqual(values[i], enumerator.Current); + } + Assert.IsFalse(enumerator.MoveNext()); + + fileReader.Close(); + } + } + [Test] public static void TestBigArrayRoundtrip() { diff --git a/csharp/LogicalColumnReader.cs b/csharp/LogicalColumnReader.cs index 906939ea..b7d38d02 100644 --- a/csharp/LogicalColumnReader.cs +++ b/csharp/LogicalColumnReader.cs @@ -44,7 +44,7 @@ internal static LogicalColumnReader Create(ColumnReader colu } } - public bool HasNext => Source.HasNext; + public abstract bool HasNext { get; } public abstract TReturn Apply(ILogicalColumnReaderVisitor visitor); @@ -153,6 +153,8 @@ private static (short definitionLevelDelta, int schemaSlice) StructSkip(ReadOnly return (definitionLevel, schemaSlice); } + public override bool HasNext => !_bufferedReader.IsEofDefinition; + public override int ReadBatch(Span destination) { short definitionLevel = 0; From b211af36a1f39e986a38c074c718466975a14c25 Mon Sep 17 00:00:00 2001 From: philjdf Date: Wed, 12 Jan 2022 00:13:52 +0000 Subject: [PATCH 2/2] Make the test clearer --- csharp.test/TestLogicalTypeRoundtrip.cs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/csharp.test/TestLogicalTypeRoundtrip.cs b/csharp.test/TestLogicalTypeRoundtrip.cs index b198e1f7..9d9ffd38 100644 --- a/csharp.test/TestLogicalTypeRoundtrip.cs +++ b/csharp.test/TestLogicalTypeRoundtrip.cs @@ -288,14 +288,19 @@ public static void TestNestedStructArray([Values(Repetition.Required, Repetition fileReader.Close(); } + /// + /// This checks that LogicalColumnReader's GetEnumerator() works correctly + /// when the column is longer than the buffer length but not an exact multiple + /// (see https://github.com/G-Research/ParquetSharp/issues/242). + /// [Test] public static void TestLargeArraysEnumerator() { - CheckEnumerator(Enumerable.Range(0, 4100).ToArray()); - CheckEnumerator(Enumerable.Range(0, 4100).Select(i => new[] {$"row {i}"}).ToArray()); + CheckEnumerator(4096, Enumerable.Range(0, 4100).ToArray()); + CheckEnumerator(4096, Enumerable.Range(0, 4100).Select(i => new[] {$"row {i}"}).ToArray()); } - private static void CheckEnumerator(T[] values) + private static void CheckEnumerator(int bufferLength, T[] values) { using var buffer = new ResizableBuffer(); @@ -306,7 +311,7 @@ private static void CheckEnumerator(T[] values) using var fileWriter = new ParquetFileWriter(output, columns); using var rowGroupWriter = fileWriter.AppendBufferedRowGroup(); - using var col = rowGroupWriter.Column(0).LogicalWriter(); + using var col = rowGroupWriter.Column(0).LogicalWriter(bufferLength); col.WriteBatch(values); fileWriter.Close(); @@ -317,7 +322,7 @@ private static void CheckEnumerator(T[] values) using var fileReader = new ParquetFileReader(input); using var rowGroupReader = fileReader.RowGroup(0); - using var col = rowGroupReader.Column(0).LogicalReader(); + using var col = rowGroupReader.Column(0).LogicalReader(bufferLength); var enumerator = col.GetEnumerator(); for (var i = 0; i < values.Length; i++)