Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
9535117
Stash changes. Maybe runs TPC-H q1?
mbutrovich Nov 8, 2024
22b648d
filters?
mbutrovich Nov 8, 2024
8e47562
Enable filter pushdown with TableParquetOptions.
mbutrovich Nov 9, 2024
196311e
Clippy.
mbutrovich Nov 9, 2024
fb68558
Fix Q1.
mbutrovich Nov 9, 2024
0b0d6e8
add partial support for multiple parquet files
andygrove Nov 9, 2024
2027755
Merge branch 'native_parquet2' into df-parquet-exec
mbutrovich Nov 9, 2024
ef9f8f5
Merge pull request #1 from andygrove/df-parquet-exec
mbutrovich Nov 9, 2024
95d69fa
Clippy
mbutrovich Nov 9, 2024
0830840
partitioning
andygrove Nov 9, 2024
d7396e4
merge
andygrove Nov 9, 2024
4e525fc
fix
andygrove Nov 9, 2024
ef54934
fix
andygrove Nov 9, 2024
e52fe77
Merge pull request #2 from andygrove/df-parquet-exec
mbutrovich Nov 9, 2024
16033d9
upmerge
andygrove Nov 11, 2024
ad46821
Merge remote-tracking branch 'apache/main' into comet-parquet-exec
andygrove Nov 11, 2024
38e32f7
wip - CometNativeScan (#1076)
parthchandra Nov 12, 2024
311bc9e
Revert "wip - CometNativeScan (#1076)"
andygrove Nov 12, 2024
bd68db8
wip - CometNativeScan (#1078)
parthchandra Nov 13, 2024
33d2b23
[comet-parquet-exec] Fix compilation errors in Rust tests, remove som…
andygrove Nov 13, 2024
eafda43
[comet-parquet-exec] Pass Spark's partitions to DF's ParquetExec (#1081)
mbutrovich Nov 13, 2024
786250a
update some stability plans (#1083)
andygrove Nov 14, 2024
8a0df9d
[comet-parquet-exec] Handle CometNativeScan RDD when DataSourceRDD in…
mbutrovich Nov 15, 2024
1cca8d6
feat: Hook DataFusion Parquet native scan with Comet execution (#1094)
viirya Nov 19, 2024
c3ad26e
fix: Support partition values in feature branch comet-parquet-exec (#…
viirya Nov 22, 2024
4de51a8
fix: Use filePath instead of pathUri (#1124)
viirya Nov 29, 2024
29b2b77
fix: [comet-parquet-exec] Use RDD partition index (#1120)
viirya Dec 2, 2024
ab09337
[comet-parquet-exec] Comet parquet exec 2 (copy of Parth's PR) (#1138)
andygrove Dec 4, 2024
e3672f7
[comet-parquet-exec] Add unit test for reading a struct field from Pa…
andygrove Dec 4, 2024
e0d8077
[comet-parquet-exec] Simplify schema logic for CometNativeScan (#1142)
mbutrovich Dec 5, 2024
bf5a2c6
clippy (#1140)
parthchandra Dec 5, 2024
bd797f5
feat: [comet-parquet-exec] Schema adapter fixes (#1139)
andygrove Dec 6, 2024
5401de0
[comet-parquet-exec] Change path handling to fix URL decoding (#1149)
mbutrovich Dec 9, 2024
3131a1d
Add CometNativeScanExec support to CheckParquetScan. (#1160)
mbutrovich Dec 10, 2024
b63570b
fix: use inputRDD to get outputPartitions in CometScanExec (#1162)
parthchandra Dec 11, 2024
06cdd22
Revert "fix: use inputRDD to get outputPartitions in CometScanExec (#…
andygrove Dec 12, 2024
8563edf
fix: [comet-parquet-exec] fix regressions original comet native scal …
parthchandra Dec 13, 2024
2686a4b
feat: [comet-parquet-exec] Use Datafusion based record batch reader f…
parthchandra Dec 17, 2024
0fccb59
Merge branch 'upstream_main' into merge_upstream_main
mbutrovich Dec 18, 2024
3b0bda3
Fix redundancy in Cargo.lock.
mbutrovich Dec 18, 2024
1ea24dd
Format, more post-merge cleanup.
mbutrovich Dec 18, 2024
2f4768d
Compiles
mbutrovich Dec 18, 2024
858f0de
Compiles
mbutrovich Dec 18, 2024
360c16d
Remove empty file.
mbutrovich Dec 18, 2024
f8eee9e
Attempt to fix JNI issue and native test build issues.
mbutrovich Dec 18, 2024
c13d6a0
Test Fix
parthchandra Dec 19, 2024
6814a99
Update planner.rs
mbutrovich Dec 19, 2024
a8355f0
Merge pull request #4 from parthchandra/merge_upstream_main
mbutrovich Dec 19, 2024
1630632
Merge remote-tracking branch 'upstream/main' into merge_upstream_main
mbutrovich Dec 19, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ public void init() throws URISyntaxException, IOException {
requestedSchema =
CometParquetReadSupport.clipParquetSchema(
requestedSchema, sparkSchema, isCaseSensitive, useFieldId, ignoreMissingIds);
if (requestedSchema.getColumns().size() != sparkSchema.size()) {
if (requestedSchema.getFieldCount() != sparkSchema.size()) {
throw new IllegalArgumentException(
String.format(
"Spark schema has %d columns while " + "Parquet schema has %d columns",
Expand Down
47 changes: 47 additions & 0 deletions common/src/main/java/org/apache/comet/parquet/Native.java
Original file line number Diff line number Diff line change
Expand Up @@ -234,4 +234,51 @@ public static native void setPageV2(
* @param handle the handle to the native Parquet column reader
*/
public static native void closeColumnReader(long handle);

///////////// Arrow Native Parquet Reader APIs
// TODO: Add partitionValues(?), improve requiredColumns to use a projection mask that corresponds
// to arrow.
// Add batch size, datetimeRebaseModeSpec, metrics(how?)...

/**
* Initialize a record batch reader for a PartitionedFile
*
* @param filePath
* @param start
* @param length
* @return a handle to the record batch reader, used in subsequent calls.
*/
public static native long initRecordBatchReader(
String filePath, long fileSize, long start, long length, byte[] requiredSchema);

// arrow native version of read batch
/**
* Read the next batch of data into memory on native side
*
* @param handle
* @return the number of rows read
*/
public static native int readNextRecordBatch(long handle);

// arrow native equivalent of currentBatch. 'columnNum' is number of the column in the record
// batch
/**
* Load the column corresponding to columnNum in the currently loaded record batch into JVM
*
* @param handle
* @param columnNum
* @param arrayAddr
* @param schemaAddr
*/
public static native void currentColumnBatch(
long handle, int columnNum, long arrayAddr, long schemaAddr);

// arrow native version to close record batch reader

/**
* Close the record batch reader. Free the resources
*
* @param handle
*/
public static native void closeRecordBatchReader(long handle);
}
Loading