-
Notifications
You must be signed in to change notification settings - Fork 44
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[FEAT][JAVA] Implement READERS and WRITERS for Java #233
Changes from 11 commits
69d8795
c6fd0f7
d30b206
dc2cad0
00dafc0
fe1ce58
20fd5ee
efd6eed
23f1da7
ab80811
4036fe0
c4e9b3f
47918f3
425060a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
.idea | ||
dependency-reduced-pom.xml | ||
target | ||
gar-java.iml | ||
cmake-build-debug |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,7 +3,9 @@ project(gar-java) | |
|
||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -g -std=c++17 -Wall") | ||
|
||
file(GLOB SOURCES "target/generated-sources/annotations/*.cc" "target/generated-test-sources/test-annotations/*.cc") | ||
file(GLOB SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/target/generated-sources/annotations/*.cc" "${CMAKE_CURRENT_SOURCE_DIR}/target/generated-test-sources/test-annotations/*.cc" | ||
"${CMAKE_CURRENT_SOURCE_DIR}/src/main/cpp/ffi/*.cc") | ||
list(REMOVE_ITEM SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/target/generated-sources/annotations/jni_com_alibaba_graphar_arrow_ArrowTable_Static_cxx_0x58c7409.cc") | ||
|
||
set(LIBNAME "gar-jni") | ||
|
||
|
@@ -16,10 +18,12 @@ include_directories(SYSTEM ${JAVA_INCLUDE_PATH2}) | |
include_directories("src/main/native") | ||
include_directories("src/test/native") | ||
|
||
find_package(Arrow REQUIRED) | ||
find_package(gar REQUIRED) | ||
|
||
add_library(${LIBNAME} SHARED ${SOURCES}) | ||
target_link_libraries(${LIBNAME} ${CMAKE_JNI_LINKER_FLAGS} gar) | ||
target_link_libraries(${LIBNAME} ${CMAKE_JNI_LINKER_FLAGS} Arrow::arrow_static) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe link to shared library is better? otherwise it would take a long time to compile There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On one head, linking arrow as dynamic will leads error about libprotobuf: https://github.com/Thespica/GraphAr/actions/runs/6206365474/job/16850543098#step:6:9100 On the other head, I consider linking arrow as static just copy arrow lib once, rather than recompile, which only cost little time. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK, this can remain and open as a issue to fix in the future. |
||
|
||
set_target_properties(${LIBNAME} PROPERTIES LINKER_LANGUAGE CXX) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,7 @@ | |
<maven.compiler.target>8</maven.compiler.target> | ||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> | ||
<fastffi.revision>0.1.2</fastffi.revision> | ||
<arrow.version>13.0.0</arrow.version> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this have to be set? If user install other version of arrow, the program still work as expect? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maven will automatically download needed jar with specific version and manage them, so it will works. But maybe relying arrow 12 will be better? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if work as expected, 13.0.0 is ok |
||
<native.library.name>gar-java</native.library.name> | ||
|
||
</properties> | ||
|
@@ -48,24 +49,50 @@ | |
<scope>provided</scope> | ||
</dependency> | ||
|
||
|
||
<dependency> | ||
<groupId>com.alibaba.fastffi</groupId> | ||
<artifactId>llvm</artifactId> | ||
<classifier>${os.detected.classifier}</classifier> | ||
<version>${fastffi.revision}</version> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>org.apache.arrow</groupId> | ||
<artifactId>arrow-c-data</artifactId> | ||
<version>${arrow.version}</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>org.apache.arrow</groupId> | ||
<artifactId>arrow-vector</artifactId> | ||
<version>${arrow.version}</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>org.apache.arrow</groupId> | ||
<artifactId>arrow-memory-netty</artifactId> | ||
<version>${arrow.version}</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>org.apache.arrow</groupId> | ||
<artifactId>arrow-dataset</artifactId> | ||
<version>${arrow.version}</version> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>junit</groupId> | ||
<artifactId>junit</artifactId> | ||
<version>4.13.2</version> | ||
<scope>test</scope> | ||
</dependency> | ||
<dependency> | ||
<groupId>org.slf4j</groupId> | ||
<artifactId>slf4j-simple</artifactId> | ||
<version>1.7.25</version> | ||
<scope>compile</scope> | ||
</dependency> | ||
</dependencies> | ||
|
||
<build> | ||
<extensions> | ||
<extensions> | ||
<extension> | ||
<groupId>kr.motd.maven</groupId> | ||
<artifactId>os-maven-plugin</artifactId> | ||
|
@@ -164,19 +191,6 @@ | |
<argLine>-Djava.library.path=${project.basedir}/target/native</argLine> | ||
</configuration> | ||
</plugin> | ||
<plugin> | ||
<groupId>com.diffplug.spotless</groupId> | ||
<artifactId>spotless-maven-plugin</artifactId> | ||
<version>2.20.0</version> | ||
<configuration> | ||
<java> | ||
<googleJavaFormat> | ||
<version>1.13.0</version> | ||
<style>AOSP</style> | ||
</googleJavaFormat> | ||
</java> | ||
</configuration> | ||
</plugin> | ||
</plugins> | ||
<pluginManagement> | ||
<plugins> | ||
|
@@ -237,6 +251,19 @@ | |
<artifactId>maven-antrun-plugin</artifactId> | ||
<version>3.0.0</version> | ||
</plugin> | ||
<plugin> | ||
<groupId>com.diffplug.spotless</groupId> | ||
<artifactId>spotless-maven-plugin</artifactId> | ||
<version>2.20.0</version> | ||
<configuration> | ||
<java> | ||
<googleJavaFormat> | ||
<version>1.7</version> | ||
<style>AOSP</style> | ||
</googleJavaFormat> | ||
</java> | ||
</configuration> | ||
</plugin> | ||
</plugins> | ||
</pluginManagement> | ||
</build> | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
#include <jni.h> | ||
#include <new> | ||
#include <iostream> | ||
#include "arrow/api.h" | ||
#include "arrow/c/bridge.h" | ||
|
||
#ifdef __cplusplus | ||
extern "C" { | ||
#endif | ||
|
||
// Common Stubs | ||
|
||
JNIEXPORT | ||
jlong JNICALL Java_com_alibaba_graphar_arrow_ArrowTable_1Static_1cxx_10x58c7409_nativeFromArrowArrayAndArrowSchema(JNIEnv*, jclass, jlong rv_base, jlong arg0 /* arrayAddress0 */, jlong arg1 /* schemaAddress1 */) { | ||
auto maybeRecordBatch = arrow::ImportRecordBatch(reinterpret_cast<struct ArrowArray*>(arg0), reinterpret_cast<struct ArrowSchema*>(arg1)); | ||
auto table = arrow::Table::FromRecordBatches({maybeRecordBatch.ValueOrDie()}); | ||
return reinterpret_cast<jlong>(new((void*)rv_base) arrow::Result<std::shared_ptr<arrow::Table>>(table)); | ||
} | ||
|
||
#ifdef __cplusplus | ||
} | ||
#endif |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
/* | ||
* Copyright 2022 Alibaba Group Holding Limited. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file | ||
* except in compliance with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software distributed under the | ||
* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either | ||
* express or implied. See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package com.alibaba.graphar.arrow; | ||
|
||
import static com.alibaba.graphar.util.CppClassName.ARROW_ARRAY; | ||
import static com.alibaba.graphar.util.CppHeaderName.ARROW_API_H; | ||
|
||
import com.alibaba.fastffi.CXXHead; | ||
import com.alibaba.fastffi.CXXPointer; | ||
import com.alibaba.fastffi.FFIGen; | ||
import com.alibaba.fastffi.FFITypeAlias; | ||
|
||
@FFIGen | ||
@FFITypeAlias(ARROW_ARRAY) | ||
@CXXHead(ARROW_API_H) | ||
public interface ArrowArray extends CXXPointer { | ||
long length(); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
/* | ||
* Copyright 2022 Alibaba Group Holding Limited. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file | ||
* except in compliance with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software distributed under the | ||
* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either | ||
* express or implied. See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package com.alibaba.graphar.arrow; | ||
|
||
import static com.alibaba.graphar.util.CppClassName.ARROW_RESULT; | ||
import static com.alibaba.graphar.util.CppHeaderName.ARROW_API_H; | ||
|
||
import com.alibaba.fastffi.CXXHead; | ||
import com.alibaba.fastffi.CXXReference; | ||
import com.alibaba.fastffi.FFIGen; | ||
import com.alibaba.fastffi.FFIPointer; | ||
import com.alibaba.fastffi.FFITypeAlias; | ||
|
||
@FFIGen | ||
@FFITypeAlias(ARROW_RESULT) | ||
@CXXHead(ARROW_API_H) | ||
public interface ArrowResult<T> extends FFIPointer { | ||
@CXXReference | ||
T ValueOrDie(); | ||
|
||
@CXXReference | ||
ArrowStatus status(); | ||
|
||
boolean ok(); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
/* | ||
* Copyright 2022 Alibaba Group Holding Limited. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file | ||
* except in compliance with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software distributed under the | ||
* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either | ||
* express or implied. See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package com.alibaba.graphar.arrow; | ||
|
||
import static com.alibaba.graphar.util.CppClassName.ARROW_STATUS; | ||
import static com.alibaba.graphar.util.CppHeaderName.ARROW_API_H; | ||
|
||
import com.alibaba.fastffi.CXXHead; | ||
import com.alibaba.fastffi.CXXReference; | ||
import com.alibaba.fastffi.FFIGen; | ||
import com.alibaba.fastffi.FFIPointer; | ||
import com.alibaba.fastffi.FFITypeAlias; | ||
import com.alibaba.graphar.stdcxx.StdString; | ||
|
||
@FFIGen | ||
@FFITypeAlias(ARROW_STATUS) | ||
@CXXHead(ARROW_API_H) | ||
public interface ArrowStatus extends FFIPointer { | ||
@CXXReference | ||
StdString message(); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
/* | ||
* Copyright 2022 Alibaba Group Holding Limited. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file | ||
* except in compliance with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software distributed under the | ||
* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either | ||
* express or implied. See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package com.alibaba.graphar.arrow; | ||
|
||
import static com.alibaba.graphar.util.CppClassName.ARROW_TABLE; | ||
import static com.alibaba.graphar.util.CppHeaderName.ARROW_API_H; | ||
|
||
import com.alibaba.fastffi.CXXHead; | ||
import com.alibaba.fastffi.CXXPointer; | ||
import com.alibaba.fastffi.CXXValue; | ||
import com.alibaba.fastffi.FFIGen; | ||
import com.alibaba.fastffi.FFILibrary; | ||
import com.alibaba.fastffi.FFITypeAlias; | ||
import com.alibaba.fastffi.FFITypeFactory; | ||
import com.alibaba.graphar.stdcxx.StdSharedPtr; | ||
import com.alibaba.graphar.stdcxx.StdString; | ||
import org.apache.arrow.c.ArrowArray; | ||
import org.apache.arrow.c.ArrowSchema; | ||
import org.apache.arrow.c.Data; | ||
import org.apache.arrow.memory.BufferAllocator; | ||
import org.apache.arrow.vector.VectorSchemaRoot; | ||
import org.apache.arrow.vector.dictionary.DictionaryProvider; | ||
|
||
@FFIGen | ||
@FFITypeAlias(ARROW_TABLE) | ||
@CXXHead(ARROW_API_H) | ||
public interface ArrowTable extends CXXPointer { | ||
|
||
/** | ||
* Convert VectorSchemaRoot to C++ arrow::Table | ||
* | ||
* @param allocator Buffer allocator for allocating C data interface fields | ||
* @param vsr Vector schema root to export | ||
* @param provider Dictionary provider for dictionary encoded vectors (optional) | ||
* @return StdSharedPtr<ArrowTable> | ||
*/ | ||
static StdSharedPtr<ArrowTable> fromVectorSchemaRoot( | ||
BufferAllocator allocator, VectorSchemaRoot vsr, DictionaryProvider provider) { | ||
ArrowResult<StdSharedPtr<ArrowTable>> maybeTable = null; | ||
org.apache.arrow.c.ArrowArray arrowArray = ArrowArray.allocateNew(allocator); | ||
org.apache.arrow.c.ArrowSchema arrowSchema = ArrowSchema.allocateNew(allocator); | ||
Data.exportVectorSchemaRoot(allocator, vsr, provider, arrowArray, arrowSchema); | ||
maybeTable = | ||
Static.INSTANCE.fromArrowArrayAndArrowSchema( | ||
arrowArray.memoryAddress(), arrowSchema.memoryAddress()); | ||
if (!maybeTable.ok()) { | ||
throw new RuntimeException( | ||
"Error when convert C RecordBatch to C++ Table: " | ||
+ maybeTable.status().message().toJavaString()); | ||
} | ||
return maybeTable.ValueOrDie(); | ||
} | ||
|
||
long num_rows(); | ||
|
||
@CXXValue | ||
StdString ToString(); | ||
|
||
@FFIGen | ||
@FFILibrary(value = "arrow", namespace = "arrow") | ||
interface Static { | ||
Static INSTANCE = FFITypeFactory.getLibrary(ArrowTable.Static.class); | ||
|
||
/** | ||
* Convert C ArrowArray and ArrowSchema to C++ arrow::Table with JNI wrote manually | ||
* | ||
* @param arrayAddress Address of C ArrowArray | ||
* @param schemaAddress Address of C ArrowSchema | ||
* @return StdSharedPtr<ArrowTable> wrapped by ArrowResult | ||
*/ | ||
@CXXValue | ||
ArrowResult<StdSharedPtr<ArrowTable>> fromArrowArrayAndArrowSchema( | ||
@FFITypeAlias("struct ArrowArray*") long arrayAddress, | ||
@FFITypeAlias("struct ArrowSchema*") long schemaAddress); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why change to JAVA 8?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I haven't used features that higher than Java 8, and Java 8 have better compatibility with others graph systems which rely on Java 8. So maybe we should assume that environment is Java 8, and Java higher is compatible with lower version.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the spark rely on java 11, so for CI unification and avoid confusion, I think 11 is better.