Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ if (MSVC)

add_compile_options("/MP") # Compile files in parallel.
add_compile_options("/WX") # Threat warnings as errors.
# See https://devblogs.microsoft.com/cppblog/msvc-now-correctly-reports-__cplusplus/
add_compile_options("/Zc:__cplusplus") # Enable standard __cplusplus macro.

endif ()

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ In the 5.0.X versions, reading nested structures was introduced. However, nestin

Building ParquetSharp for Windows requires the following dependencies:
- Visual Studio 2019 (16.4 or higher)
- Apache Arrow (5.0.0)
- Apache Arrow (6.0.1)

For building Arrow (including Parquet) and its dependencies, we recommend using Microsoft's [vcpkg](https://github.com/Microsoft/vcpkg). Note that the Windows build needs to be done in a Visual Studio x64 Native Tools Command Prompt for the build script to succeed.

Expand Down
6 changes: 6 additions & 0 deletions cpp/Enums.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ namespace
static_assert(Encoding::RLE_DICTIONARY == 8);
static_assert(Encoding::BYTE_STREAM_SPLIT == 9);
static_assert(Encoding::UNDEFINED == 10);
static_assert(Encoding::UNKNOWN == 999);

static_assert(LogicalType::Type::UNDEFINED == 0);
static_assert(LogicalType::Type::STRING == 1);
Expand All @@ -57,7 +58,12 @@ namespace
static_assert(ParquetCipher::AES_GCM_CTR_V1 == 1);

static_assert(ParquetVersion::PARQUET_1_0 == 0);
ARROW_SUPPRESS_DEPRECATION_WARNING
static_assert(ParquetVersion::PARQUET_2_0 == 1);
ARROW_UNSUPPRESS_DEPRECATION_WARNING
static_assert(ParquetVersion::PARQUET_2_4 == 2);
static_assert(ParquetVersion::PARQUET_2_6 == 3);
static_assert(ParquetVersion::PARQUET_2_LATEST == 3);

static_assert(Type::BOOLEAN == 0);
static_assert(Type::INT32 == 1);
Expand Down
59 changes: 29 additions & 30 deletions csharp.test/TestLogicalTypeRoundtrip.cs
Original file line number Diff line number Diff line change
Expand Up @@ -154,17 +154,14 @@ private static void AssertReadRoundtrip(int rowsPerBatch, int readBufferLength,
if (expected.HasStatistics)
{
Assert.AreEqual(expected.HasMinMax, statistics?.HasMinMax);
//Assert.AreEqual(expected.NullCount, statistics?.NullCount);
//Assert.AreEqual(expected.NumValues, statistics?.NumValues);
Assert.AreEqual(expected.NullCount, statistics?.NullCount);
Assert.AreEqual(expected.NumValues, statistics?.NumValues);
Assert.AreEqual(expected.PhysicalType, statistics?.PhysicalType);

// BUG Don't check for decimal until https://issues.apache.org/jira/browse/ARROW-6149 is fixed.
var buggy = expected.LogicalType is DecimalLogicalType;

if (expected.HasMinMax && !buggy)
if (expected.HasMinMax)
{
Assert.AreEqual(expected.Min, expected.Converter(statistics!.MinUntyped));
Assert.AreEqual(expected.Max, expected.Converter(statistics!.MaxUntyped));
Assert.AreEqual(expected.Min, expected.Converter(statistics!.MinUntyped, descr));
Assert.AreEqual(expected.Max, expected.Converter(statistics!.MaxUntyped, descr));
}
}
else
Expand Down Expand Up @@ -648,7 +645,8 @@ private static ExpectedColumn[] CreateExpectedColumns()
Values = Enumerable.Range(0, NumRows).Select(i => ((decimal) i * i * i) / 1000 - 10).ToArray(),
Min = -10m,
Max = ((NumRows - 1m) * (NumRows - 1m) * (NumRows - 1m)) / 1000 - 10,
Converter = v => LogicalRead.ToDecimal((FixedLenByteArray) v, 3)
Converter = (v, descr) => LogicalRead.ToDecimal(
(FixedLenByteArray) v, Decimal128.GetScaleMultiplier(descr.TypeScale))
},
new ExpectedColumn
{
Expand All @@ -662,7 +660,8 @@ private static ExpectedColumn[] CreateExpectedColumns()
NumValues = NumRows - (NumRows + 10) / 11,
Min = -9.999m,
Max = ((NumRows - 1m) * (NumRows - 1m) * (NumRows - 1m)) / 1000 - 10,
Converter = v => LogicalRead.ToDecimal((FixedLenByteArray) v, 3)
Converter = (v, descr) => LogicalRead.ToDecimal(
(FixedLenByteArray) v, Decimal128.GetScaleMultiplier(descr.TypeScale))
},
new ExpectedColumn
{
Expand All @@ -674,7 +673,7 @@ private static ExpectedColumn[] CreateExpectedColumns()
Values = Enumerable.Range(0, NumRows).Select(i => new Guid(i, 0x1234, 0x5678, 0x9A, 0xBC, 0xDE, 0xF0, 0x12, 0x34, 0x56, 0x7F)).ToArray(),
Min = new Guid(0, 0x1234, 0x5678, 0x9A, 0xBC, 0xDE, 0xF0, 0x12, 0x34, 0x56, 0x7F),
Max = new Guid(NumRows - 1, 0x1234, 0x5678, 0x9A, 0xBC, 0xDE, 0xF0, 0x12, 0x34, 0x56, 0x7F),
Converter = v => LogicalRead.ToUuid((FixedLenByteArray) v)
Converter = (v, _) => LogicalRead.ToUuid((FixedLenByteArray) v)
},
new ExpectedColumn
{
Expand All @@ -688,7 +687,7 @@ private static ExpectedColumn[] CreateExpectedColumns()
NumValues = NumRows - (NumRows + 10) / 11,
Min = new Guid(1, 0x1234, 0x5678, 0x9A, 0xBC, 0xDE, 0xF0, 0x12, 0x34, 0x56, 0x7F),
Max = new Guid(NumRows - 1, 0x1234, 0x5678, 0x9A, 0xBC, 0xDE, 0xF0, 0x12, 0x34, 0x56, 0x7F),
Converter = v => LogicalRead.ToUuid((FixedLenByteArray) v)
Converter = (v, _) => LogicalRead.ToUuid((FixedLenByteArray) v)
},
new ExpectedColumn
{
Expand Down Expand Up @@ -718,7 +717,7 @@ private static ExpectedColumn[] CreateExpectedColumns()
Values = Enumerable.Range(0, NumRows).Select(i => new DateTime(2018, 01, 01) + TimeSpan.FromHours(i)).ToArray(),
Min = new DateTime(2018, 01, 01),
Max = new DateTime(2018, 01, 01) + TimeSpan.FromHours(NumRows - 1),
Converter = v => LogicalRead.ToDateTimeMicros((long) v)
Converter = (v, _) => LogicalRead.ToDateTimeMicros((long) v)
},
new ExpectedColumn
{
Expand All @@ -730,7 +729,7 @@ private static ExpectedColumn[] CreateExpectedColumns()
NumValues = NumRows - (NumRows + 10) / 11,
Min = new DateTime(2018, 01, 01) + TimeSpan.FromHours(1),
Max = new DateTime(2018, 01, 01) + TimeSpan.FromHours(NumRows - 1),
Converter = v => LogicalRead.ToDateTimeMicros((long) v)
Converter = (v, _) => LogicalRead.ToDateTimeMicros((long) v)
},
new ExpectedColumn
{
Expand All @@ -741,7 +740,7 @@ private static ExpectedColumn[] CreateExpectedColumns()
Values = Enumerable.Range(0, NumRows).Select(i => new DateTime(2018, 01, 01) + TimeSpan.FromHours(i)).ToArray(),
Min = new DateTime(2018, 01, 01),
Max = new DateTime(2018, 01, 01) + TimeSpan.FromHours(NumRows - 1),
Converter = v => LogicalRead.ToDateTimeMillis((long) v)
Converter = (v, _) => LogicalRead.ToDateTimeMillis((long) v)
},
new ExpectedColumn
{
Expand All @@ -754,7 +753,7 @@ private static ExpectedColumn[] CreateExpectedColumns()
NumValues = NumRows - (NumRows + 10) / 11,
Min = new DateTime(2018, 01, 01) + TimeSpan.FromHours(1),
Max = new DateTime(2018, 01, 01) + TimeSpan.FromHours(NumRows - 1),
Converter = v => LogicalRead.ToDateTimeMillis((long) v)
Converter = (v, _) => LogicalRead.ToDateTimeMillis((long) v)
},
new ExpectedColumn
{
Expand All @@ -764,7 +763,7 @@ private static ExpectedColumn[] CreateExpectedColumns()
Values = Enumerable.Range(0, NumRows).Select(i => new DateTimeNanos(new DateTime(2018, 01, 01) + TimeSpan.FromHours(i))).ToArray(),
Min = new DateTimeNanos(new DateTime(2018, 01, 01)),
Max = new DateTimeNanos(new DateTime(2018, 01, 01) + TimeSpan.FromHours(NumRows - 1)),
Converter = v => new DateTimeNanos((long) v)
Converter = (v, _) => new DateTimeNanos((long) v)
},
new ExpectedColumn
{
Expand All @@ -776,7 +775,7 @@ private static ExpectedColumn[] CreateExpectedColumns()
NumValues = NumRows - (NumRows + 10) / 11,
Min = new DateTimeNanos(new DateTime(2018, 01, 01) + TimeSpan.FromHours(1)),
Max = new DateTimeNanos(new DateTime(2018, 01, 01) + TimeSpan.FromHours(NumRows - 1)),
Converter = v => new DateTimeNanos((long) v)
Converter = (v, _) => new DateTimeNanos((long) v)
},
new ExpectedColumn
{
Expand All @@ -786,7 +785,7 @@ private static ExpectedColumn[] CreateExpectedColumns()
Values = Enumerable.Range(0, NumRows).Select(i => TimeSpan.FromHours(-13) + TimeSpan.FromHours(i)).ToArray(),
Min = TimeSpan.FromHours(-13),
Max = TimeSpan.FromHours(-13 + NumRows - 1),
Converter = v => LogicalRead.ToTimeSpanMicros((long) v)
Converter = (v, _) => LogicalRead.ToTimeSpanMicros((long) v)
},
new ExpectedColumn
{
Expand All @@ -798,7 +797,7 @@ private static ExpectedColumn[] CreateExpectedColumns()
NumValues = NumRows - (NumRows + 10) / 11,
Min = TimeSpan.FromHours(-13 + 1),
Max = TimeSpan.FromHours(-13 + NumRows - 1),
Converter = v => LogicalRead.ToTimeSpanMicros((long) v)
Converter = (v, _) => LogicalRead.ToTimeSpanMicros((long) v)
},
new ExpectedColumn
{
Expand All @@ -809,7 +808,7 @@ private static ExpectedColumn[] CreateExpectedColumns()
Values = Enumerable.Range(0, NumRows).Select(i => TimeSpan.FromHours(-13) + TimeSpan.FromHours(i)).ToArray(),
Min = TimeSpan.FromHours(-13),
Max = TimeSpan.FromHours(-13 + NumRows - 1),
Converter = v => LogicalRead.ToTimeSpanMillis((int) v)
Converter = (v, _) => LogicalRead.ToTimeSpanMillis((int) v)
},
new ExpectedColumn
{
Expand All @@ -822,7 +821,7 @@ private static ExpectedColumn[] CreateExpectedColumns()
NumValues = NumRows - (NumRows + 10) / 11,
Min = TimeSpan.FromHours(-13 + 1),
Max = TimeSpan.FromHours(-13 + NumRows - 1),
Converter = v => LogicalRead.ToTimeSpanMillis((int) v)
Converter = (v, _) => LogicalRead.ToTimeSpanMillis((int) v)
},
new ExpectedColumn
{
Expand All @@ -832,7 +831,7 @@ private static ExpectedColumn[] CreateExpectedColumns()
Values = Enumerable.Range(0, NumRows).Select(i => new TimeSpanNanos(TimeSpan.FromHours(-13) + TimeSpan.FromHours(i))).ToArray(),
Min = new TimeSpanNanos(TimeSpan.FromHours(-13)),
Max = new TimeSpanNanos(TimeSpan.FromHours(-13 + NumRows - 1)),
Converter = v => new TimeSpanNanos((long) v)
Converter = (v, _) => new TimeSpanNanos((long) v)
},
new ExpectedColumn
{
Expand All @@ -844,7 +843,7 @@ private static ExpectedColumn[] CreateExpectedColumns()
NumValues = NumRows - (NumRows + 10) / 11,
Min = new TimeSpanNanos(TimeSpan.FromHours(-13 + 1)),
Max = new TimeSpanNanos(TimeSpan.FromHours(-13 + NumRows - 1)),
Converter = v => new TimeSpanNanos((long) v)
Converter = (v, _) => new TimeSpanNanos((long) v)
},
new ExpectedColumn
{
Expand All @@ -856,7 +855,7 @@ private static ExpectedColumn[] CreateExpectedColumns()
NumValues = NumRows - (NumRows + 17) / 18,
Min = "",
Max = "Hello, 98!",
Converter = v => LogicalRead.ToString((ByteArray) v)
Converter = (v, _) => LogicalRead.ToString((ByteArray) v)
},
new ExpectedColumn
{
Expand All @@ -869,7 +868,7 @@ private static ExpectedColumn[] CreateExpectedColumns()
NumValues = NumRows - (NumRows + 8) / 9,
Min = "{ \"id\", 1 }",
Max = "{ \"id\", 98 }",
Converter = v => LogicalRead.ToString((ByteArray) v)
Converter = (v, _) => LogicalRead.ToString((ByteArray) v)
},
new ExpectedColumn
{
Expand All @@ -880,7 +879,7 @@ private static ExpectedColumn[] CreateExpectedColumns()
NumValues = NumRows - (NumRows + 5) / 6,
Min = new byte[0],
Max = BitConverter.GetBytes(NumRows - 1),
Converter = v => LogicalRead.ToByteArray((ByteArray) v)
Converter = (v, _) => LogicalRead.ToByteArray((ByteArray) v)
},
new ExpectedColumn
{
Expand All @@ -893,7 +892,7 @@ private static ExpectedColumn[] CreateExpectedColumns()
NumValues = NumRows - (NumRows + 2) / 3,
Min = BitConverter.GetBytes(1),
Max = BitConverter.GetBytes(NumRows - 1),
Converter = v => LogicalRead.ToByteArray((ByteArray) v)
Converter = (v, _) => LogicalRead.ToByteArray((ByteArray) v)
},
new ExpectedColumn
{
Expand Down Expand Up @@ -1006,7 +1005,7 @@ private static ExpectedColumn[] CreateExpectedColumns()
NumValues = (NumRows / 3 + 1) * 3,
Min = BitConverter.GetBytes(0),
Max = BitConverter.GetBytes(252),
Converter = v => LogicalRead.ToByteArray((ByteArray) v)
Converter = (v, _) => LogicalRead.ToByteArray((ByteArray) v)
}
};
}
Expand All @@ -1027,7 +1026,7 @@ private sealed class ExpectedColumn
public long NullCount;
public long NumValues = NumRows;

public Func<object, object> Converter = v => v;
public Func<object, ColumnDescriptor, object> Converter = (v, _) => v;
}

private const int NumRows = 119;
Expand Down
4 changes: 2 additions & 2 deletions csharp.test/TestPhysicalTypeRoundtrip.cs
Original file line number Diff line number Diff line change
Expand Up @@ -118,14 +118,14 @@ private static void AssertReadRoundtrip(ResizableBuffer buffer, ExpectedColumn[]

var numRows = expectedColumns.First().Values.Length;

Assert.AreEqual("parquet-cpp-arrow version 5.0.0", fileMetaData.CreatedBy);
Assert.AreEqual("parquet-cpp-arrow version 6.0.1", fileMetaData.CreatedBy);
Assert.AreEqual(new Dictionary<string, string> {{"case", "Test"}, {"Awesome", "true"}}, fileMetaData.KeyValueMetadata);
Assert.AreEqual(expectedColumns.Length, fileMetaData.NumColumns);
Assert.AreEqual(numRows, fileMetaData.NumRows);
Assert.AreEqual(1, fileMetaData.NumRowGroups);
Assert.AreEqual(1 + expectedColumns.Length, fileMetaData.NumSchemaElements);
Assert.AreEqual(ParquetVersion.PARQUET_1_0, fileMetaData.Version);
Assert.AreEqual("parquet-cpp-arrow version 5.0.0", fileMetaData.WriterVersion.ToString());
Assert.AreEqual("parquet-cpp-arrow version 6.0.1", fileMetaData.WriterVersion.ToString());

using var rowGroupReader = fileReader.RowGroup(0);
var rowGroupMetaData = rowGroupReader.MetaData;
Expand Down
2 changes: 1 addition & 1 deletion csharp.test/TestWriterProperties.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public static void TestDefaultProperties()
{
var p = WriterProperties.GetDefaultWriterProperties();

Assert.AreEqual("parquet-cpp-arrow version 5.0.0", p.CreatedBy);
Assert.AreEqual("parquet-cpp-arrow version 6.0.1", p.CreatedBy);
Assert.AreEqual(Compression.Uncompressed, p.Compression(new ColumnPath("anypath")));
Assert.AreEqual(int.MinValue, p.CompressionLevel(new ColumnPath("anypath")));
Assert.AreEqual(1024 * 1024, p.DataPageSize);
Expand Down
3 changes: 2 additions & 1 deletion csharp/Encoding.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ public enum Encoding
DeltaByteArray = 7,
RleDictionary = 8,
ByteStreamSplit = 9,
Undefined = 10
Undefined = 10,
Unknown = 999
}
}
2 changes: 1 addition & 1 deletion csharp/ParquetSharp.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<NoWarn>1591;</NoWarn>
<Version>5.0.0</Version>
<Version>6.0.1-beta1</Version>
<Company>G-Research</Company>
<Authors>G-Research</Authors>
<Product>ParquetSharp</Product>
Expand Down
5 changes: 4 additions & 1 deletion csharp/ParquetVersion.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@ public enum ParquetVersion
{
// ReSharper disable InconsistentNaming
PARQUET_1_0 = 0,
PARQUET_2_0 = 1
PARQUET_2_0 = 1,
PARQUET_2_4 = 2,
PARQUET_2_6 = 3,
PARQUET_2_LATEST = 3
// ReSharper restore InconsistentNaming
}
}
2 changes: 1 addition & 1 deletion vcpkg_version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
https://github.com/microsoft/vcpkg.git 77c4e0cb087b3cecb5525dacd1fad18a0bb23573
https://github.com/microsoft/vcpkg.git 0fcee5a2ab497b851f6dbf55dc052bcff2b05072