From c752915bb4a8842423d91481def089e873712b7d Mon Sep 17 00:00:00 2001 From: John Date: Mon, 30 Oct 2023 20:39:25 +0800 Subject: [PATCH] [Doc] Provide Java's reference library, documentation for users and developers (#242) --- docs/Makefile | 17 ++- docs/developers/java-dev.rst | 97 +++++++++++++ docs/index.rst | 3 + docs/reference/java-api/index.rst | 4 + docs/user-guide/java-lib.rst | 218 ++++++++++++++++++++++++++++++ docs/user-guide/spark-lib.rst | 10 +- java/CMakeLists.txt | 12 +- java/README.md | 69 +++++++--- java/pom.xml | 41 +++++- spark/README.md | 2 +- 10 files changed, 437 insertions(+), 36 deletions(-) create mode 100644 docs/developers/java-dev.rst create mode 100644 docs/reference/java-api/index.rst create mode 100644 docs/user-guide/java-lib.rst diff --git a/docs/Makefile b/docs/Makefile index 408c17992..2d7df41b3 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -37,12 +37,21 @@ cpp-apidoc: .PHONY: spark-apidoc spark-apidoc: cd $(ROOTDIR)/spark && \ - mvn scala:doc + mvn scala:doc \ + -DoutputDirectory=$(BUILDDIR)/html/reference/spark-api + +.PHONY: java-apidoc +java-apidoc: + cd $(ROOTDIR)/java && \ + mvn -P javadoc javadoc:aggregate \ + -Dmaven.antrun.skip=true \ + -DskipTests \ + -Djavadoc.output.directory=$(BUILDDIR)/html/reference/ \ + -Djavadoc.output.destDir=java-api \ + --quiet .PHONY: html -html: cpp-apidoc spark-apidoc +html: cpp-apidoc spark-apidoc java-apidoc $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html - rm -fr $(BUILDDIR)/html/reference/spark-api - cp -fr $(ROOTDIR)/spark/target/site/scaladocs $(BUILDDIR)/html/reference/spark-api @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." diff --git a/docs/developers/java-dev.rst b/docs/developers/java-dev.rst new file mode 100644 index 000000000..bcbd976f8 --- /dev/null +++ b/docs/developers/java-dev.rst @@ -0,0 +1,97 @@ +Java Development +================ + +Introduction +------------ + +GraphAr Java library based on GraphAr C++ library and an efficient FFI +for Java and C++ called +`FastFFI `__. + +Source Code Level +~~~~~~~~~~~~~~~~~ + +- Interface + +- Class + +- JNI code + +- GraphAr C++ library + +If you want to use classes or functions of GraphAr C++ library through JAVA SDK, you only need to write interfaces with +annotations. After the interfaces are ready, the java code for the interfaces and the the C++ code which include JNI +code for native methods will be automatically generated by FastFFI.For +annotation's usage, please refer to +`FastFFI `__. + + + +Runtime Level +~~~~~~~~~~~~~ + +Interfaces and classes will be compiled to bytecode. Usually, JNI code will be compiled to bitcode as a part of +dynamic library which can be called by native methods directly. +If llvm4jni is enable, suitable method in JNI will be transferred to bytecode. + +For decoupling the implementation of C++ and Java, we use a bridge dynamic library called gar-jni to connect them, it +will integrate all C++ dependencies(e.g. JNI code, GraphAr C++ library and arrow C++) +and can be called by native methods in Java directly. +Most JNI code is generated by FastFFI, but some JNI code is written by ourselves, such as JNI code for +transferring VectorSchemaRoot into arrow::Table. + +To build the bridge dynamic library, here is main part of our CMakeLists.txt: + +.. code-block:: cmake + + # set auto-generated JNI code and handwriting JNI code as source files + file(GLOB SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/target/generated-sources/annotations/*.cc" "${CMAKE_CURRENT_SOURCE_DIR}/target/generated-test-sources/test-annotations/*.cc" + "${CMAKE_CURRENT_SOURCE_DIR}/src/main/cpp/ffi/*.cc") + # remove auto-generated JNI code for specific method cause we have handwriting JNI code for it + list(REMOVE_ITEM SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/target/generated-sources/annotations/jni_com_alibaba_graphar_arrow_ArrowTable_Static_cxx_0x58c7409.cc") + + set(LIBNAME "gar-jni") + + # find JNI related libraries + find_package(JNI REQUIRED) + include_directories(SYSTEM ${JAVA_INCLUDE_PATH}) + include_directories(SYSTEM ${JAVA_INCLUDE_PATH2}) + + # some JNI code depends on arrow + find_package(Arrow REQUIRED) + # build graphar-cpp in specific version + include(graphar-cpp) + build_graphar_cpp() + + # build the bridge JNI library + add_library(${LIBNAME} SHARED ${SOURCES}) + # include graphar-cpp headers + target_include_directories(${LIBNAME} SYSTEM BEFORE PRIVATE ${GAR_INCLUDE_DIR}) + # link graphar-cpp and arrow + target_link_libraries(${LIBNAME} ${CMAKE_JNI_LINKER_FLAGS} gar_shared) + target_link_libraries(${LIBNAME} ${CMAKE_JNI_LINKER_FLAGS} Arrow::arrow_static) + +More about usage of CMake, please refer to `CMake's official website `__. + +Building GraphAr Java +--------------------- + +Please refer to `GraphAr Java Library user guide <../user-guide/java-lib.html>`__. + +Code Style +---------- + +We follow `AOSP Java code +style `__. To ensure +CI for checking code style will pass, please ensure check below is +success: + +.. code-block:: bash + + mvn spotless:check + +If there are violations, running command below to automatically format: + +.. code-block:: bash + + mvn spotless:apply diff --git a/docs/index.rst b/docs/index.rst index 8e87035c4..9d767e6a4 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -14,6 +14,7 @@ user-guide/getting-started.rst user-guide/file-format.rst user-guide/spark-lib.rst + user-guide/java-lib.rst .. toctree:: :maxdepth: 1 @@ -32,6 +33,7 @@ developers/community.rst developers/contributing.rst + developers/java-dev.rst .. toctree:: :maxdepth: 1 @@ -40,3 +42,4 @@ reference/api-reference-cpp.rst Spark API Reference + Java API Reference diff --git a/docs/reference/java-api/index.rst b/docs/reference/java-api/index.rst new file mode 100644 index 000000000..8433e1095 --- /dev/null +++ b/docs/reference/java-api/index.rst @@ -0,0 +1,4 @@ +Java API Reference (javadoc) +============================== + +Stub page for the Java reference docs; actual source is located in the java-api/ directory. \ No newline at end of file diff --git a/docs/user-guide/java-lib.rst b/docs/user-guide/java-lib.rst new file mode 100644 index 000000000..e72ed41c8 --- /dev/null +++ b/docs/user-guide/java-lib.rst @@ -0,0 +1,218 @@ +GraphAr Java Library +==================== + +Overview +-------- + +Based on an efficient FFI for Java and C++ called +`fastFFI `__, the GraphAr Java +library allows users to write Java for generating, loading and +transforming GAR files. It consists of several components: + +- **Information Classes**: As same with in the C++ library, the + information classes are implemented to construct and access the meta + information about the **graphs**, **vertices** and **edges** in + GraphAr. + +- **Writers**: The GraphAr Java writer provides a set of interfaces + that can be used to write Apache Arrow VectorSchemaRoot into GAR + files. Every time it takes a VectorSchemaRoot as the logical table + for a type of vertices or edges, then convert it to ArrowTable, and + then dumps it to standard GAR files (CSV, ORC or Parquet files) under + the specific directory path. + +- **Readers**: The GraphAr Java reader provides a set of interfaces + that can be used to read GAR files. It reads a collection of vertices + or edges at a time and assembles the result into the ArrowTable. + Similar with the reader in the C++ library, it supports the users to + specify the data they need, e.g., reading a single property group + instead of all properties. + +Get GraphAr Java Library +------------------------ + +Building from source +~~~~~~~~~~~~~~~~~~~~ + +Only support installing from source currently, but we will support +installing from Maven in the future. + +Firstly, install llvm-11. ``LLVM11_HOME`` should point to the home of +LLVM 11. In Ubuntu, it is at ``/usr/lib/llvm-11``. Basically, the build +procedure the following binary: + +- ``$LLVM11_HOME/bin/clang++`` + +- ``$LLVM11_HOME/bin/ld.lld`` + +- ``$LLVM11_HOME/lib/cmake/llvm`` + +Tips: + +- Use Ubuntu as example: + + .. code-block:: bash + + $ sudo apt-get install llvm-11 clang-11 lld-11 libclang-11-dev libz-dev -y + $ export LLVM11_HOME=/usr/lib/llvm-11 + +- Or compile from source with this + `script `__: + + .. code-block:: bash + + $ export LLVM11_HOME=/usr/lib/llvm-11 + $ export LLVM_VAR=11.0.0 + $ sudo ./install-llvm11.sh + +Make the graphar-java-library directory as the current working +directory: + +.. code-block:: bash + + $ git clone https://github.com/alibaba/GraphAr.git + $ cd GraphAr + $ git submodule update --init + $ cd java + +Compile package: + +.. code-block:: bash + + $ mvn clean install -DskipTests + +Then set GraphAr as a dependency in maven project: + +.. code-block:: xml + + + + com.alibaba.graphar + gar-java + 0.1.0 + + + +How to use +---------- + +Information classes +~~~~~~~~~~~~~~~~~~~ + +The Java library for GraphAr provides distinct information classes for +constructing and accessing meta information about graphs, vertices, and +edges. These classes act as essential parameters for constructing +readers and writers, and they can be built either from the existing meta +files (in the Yaml format) or in-memory from scratch. + +To construct information from a Yaml file, please refer to the following +example code. + +.. code-block:: java + + // read graph yaml and construct information + String path = ...; // the path to the yaml file + Result graphInfoResult = GraphInfo.load(path); + if (!graphInfoResult.hasError()) { + GraphInfo graphInfo = graphInfoResult.value(); + // use information classes + StdMap vertexInfos = graphInfo.getVertexInfos(); + StdMap edgeInfos = graphInfo.getEdgeInfos(); + } + +See `test for +graphinfo `__ +for the complete example. + +Writers +~~~~~~~ + +The GraphAr Java writers wrap C++ interfaces to write arrow::Table into GraphAr +formatted files in a batch-import fashion. But arrow::Table is not easy +to build in Java. Instead, the GraphAr Java library provide a static +method to convert VectorSchemaRoot into arrow::Table. Warning: There are +some problems concerning this method which lead to memory leaks. We will +fix it or rewrite writers with Apache arrow Java. + +With the VertexWriter, users can specify a particular property group to +be written into its corresponding chunks, or choose to write all +property groups. For edge chunks, besides the meta data (edge info), the +adjList type should also be specified. The adjList/properties can be +written alone, or alternatively, all adjList, properties, and the offset +(for CSR and CSC format) chunks can be written simultaneously. + +To utilize the GAR Java writer, please refer to the following example +code. + +.. code-block:: java + + // common steps to construct VectorSchemaRoot + String uri = "file:" + ...; // data source + ScanOptions options = new ScanOptions(/*batchSize*/ 32768); + StdSharedPtr table = null; + try (BufferAllocator allocator = new RootAllocator(); + DatasetFactory datasetFactory = + new FileSystemDatasetFactory( + allocator, NativeMemoryPool.getDefault(), FileFormat.PARQUET, uri); + Dataset dataset = datasetFactory.finish(); + Scanner scanner = dataset.newScan(options); + ArrowReader reader = scanner.scanBatches()) { + while (reader.loadNextBatch()) { + try (VectorSchemaRoot root = reader.getVectorSchemaRoot()) { + // convert VectorSchemaRoot to ArrowTable + table = ArrowTable.fromVectorSchemaRoot(allocator, root, reader); + } + } + } catch (Exception e) { + e.printStackTrace(); + } + + // construct writer object + String path = ...; // file to be wrote + StdString edgeMetaFile = StdString.create(path); + StdSharedPtr edgeMeta = Yaml.loadFile(edgeMetaFile).value(); + EdgeInfo edgeInfo = EdgeInfo.load(edgeMeta).value(); + EdgeChunkWriter writer = EdgeChunkWriter.factory.create( + edgeInfo, StdString.create("/tmp/"), AdjListType.ordered_by_source); + + // write table with writer object + writer.sortAndWriteAdjListTable(table, 0, 0); // Write adj list of vertex chunk 0 to files + +See `test for +writers `__ +for the complete example. + +Readers +~~~~~~~ + +The GraphAr Java reader provides an extensive set of interfaces to read +GAR files. It reads a collection of vertices or edges at a time as +ArrowTable. Similar with the reader in C++ library, it supports the +users to specify the data they need, e.g., a single property group. + +To utilize the GAR Java reader, please refer to the following example +code. + +.. code-block:: java + + // construct vertex chunk reader + GraphInfo graphInfo = ...; // load graph meta info + StdString label = StdString.create("person"); + StdString propertyName = StdString.create("id"); + if (graphInfo.getVertexInfo(label).hasError()) { + // throw Exception or do other things + } + PropertyGroup group = graphInfo.getVertexPropertyGroup(label, propertyName).value(); + Result maybeReader = + GrapharStaticFunctions.INSTANCE.constructVertexPropertyArrowChunkReader( + graphInfo, label, group); + // check reader's status if needed + VertexPropertyArrowChunkReader reader = maybeReader.value(); + Result> result = reader.getChunk(); + // check table's status if needed + StdSharedPtr table = result.value(); + StdPair range = reader.getRange().value(); + +See `test for +readers `__ +for the complete example. diff --git a/docs/user-guide/spark-lib.rst b/docs/user-guide/spark-lib.rst index 5d463730a..a2fde17e0 100644 --- a/docs/user-guide/spark-lib.rst +++ b/docs/user-guide/spark-lib.rst @@ -78,7 +78,7 @@ The Spark library for GraphAr provides distinct information classes for construc To construct information from a Yaml file, please refer to the following example code. -.. code:: scala +.. code-block:: scala // read graph yaml and construct information val spark = ... // the Spark session @@ -104,7 +104,7 @@ To address this issue, the GraphAr Spark library offers the IndexGenerator which To utilize IndexGenerator, please refer to the following example code. -.. code:: scala +.. code-block:: scala // generate indices for vertex DataFrame val vertex_df = ... @@ -130,7 +130,7 @@ The GraphAr Spark writer provides the necessary Spark interfaces to write DataFr To utilize the GAR Spark writer, please refer to the following example code. -.. code:: scala +.. code-block:: scala // generate the vertex index column for vertex DataFrame val vertex_df = ... @@ -171,7 +171,7 @@ After content has been read into the Spark DataFrame, users can leverage it to d To utilize the GAR Spark reader, please refer to the following example code. -.. code:: scala +.. code-block:: scala // construct the vertex reader val prefix = ... @@ -207,7 +207,7 @@ The Graph Reader is a helper object which enables users to read all the chunk fi The Graph Transformer is a helper object in the GraphAr Spark library, designed to assist with data transformation at the graph level. It takes two GraphInfo objects (or paths of two yaml files) as inputs: one for the source graph, and one for the destination graph. The transformer will then load data from existing GAR files for the source graph, utilizing the GraphAr Spark Reader and the meta data defined in the source GraphInfo. After reorganizing the data according to the destination GraphInfo, it generates new GAR chunk files with the GraphAr Spark Writer. -.. code:: scala +.. code-block:: scala // transform graphs by yaml paths val spark = ... // the Spark session diff --git a/java/CMakeLists.txt b/java/CMakeLists.txt index 570178ae5..10a7ab08e 100644 --- a/java/CMakeLists.txt +++ b/java/CMakeLists.txt @@ -4,32 +4,38 @@ project(gar-java) set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -g -std=c++17 -Wall") +# set auto-generated JNI code and handwriting JNI code as source files file(GLOB SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/target/generated-sources/annotations/*.cc" "${CMAKE_CURRENT_SOURCE_DIR}/target/generated-test-sources/test-annotations/*.cc" "${CMAKE_CURRENT_SOURCE_DIR}/src/main/cpp/ffi/*.cc") +# remove auto-generated JNI code for specific method cause we have handwriting JNI code for it list(REMOVE_ITEM SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/target/generated-sources/annotations/jni_com_alibaba_graphar_arrow_ArrowTable_Static_cxx_0x58c7409.cc") set(LIBNAME "gar-jni") set(JAVA_AWT_LIBRARY NotNeeded) set(JAVA_AWT_INCLUDE_PATH NotNeeded) +# find JNI related libraries find_package(JNI REQUIRED) include_directories(SYSTEM ${JAVA_INCLUDE_PATH}) include_directories(SYSTEM ${JAVA_INCLUDE_PATH2}) -include_directories("src/main/native") -include_directories("src/test/native") - +# some JNI code depends on arrow find_package(Arrow REQUIRED) +# build graphar-cpp in specific version include(graphar-cpp) build_graphar_cpp() +# build the bridge JNI library add_library(${LIBNAME} SHARED ${SOURCES}) +# include graphar-cpp headers target_include_directories(${LIBNAME} SYSTEM BEFORE PRIVATE ${GAR_INCLUDE_DIR}) +# link graphar-cpp and arrow target_link_libraries(${LIBNAME} ${CMAKE_JNI_LINKER_FLAGS} gar_shared) target_link_libraries(${LIBNAME} ${CMAKE_JNI_LINKER_FLAGS} Arrow::arrow_static) set_target_properties(${LIBNAME} PROPERTIES LINKER_LANGUAGE CXX) +# post generated files to target directory add_custom_command(TARGET ${LIBNAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy $ "${CMAKE_CURRENT_SOURCE_DIR}/target/classes/") diff --git a/java/README.md b/java/README.md index 7420aa349..26f89783d 100644 --- a/java/README.md +++ b/java/README.md @@ -1,39 +1,72 @@ -# [WIP] GraphAr Java +# GraphAr Java This directory contains the code and build system for the GraphAr Java library which powered by [Alibaba-FastFFI](https://github.com/alibaba/fastFFI). -NOTE: This project is still under development, and we will release it soon. - ## Dependencies ### Java - JDK 8 or higher -- Maven +- Maven3+ ### C++ - CMake 3.5 or higher -- [GraphAr C++ library v0.9.0](../cpp/README.md) - LLVM 11 -Tips: -- To install LLVM 11, you can refer to our [Java CI](../.github/workflows/java.yml) on Ubuntu or compile from source with [this script](https://github.com/alibaba/fastFFI/blob/main/docker/install-llvm11.sh). - -## Build, Test and Install +## Install Only support installing from source currently, but we will support installing from Maven in the future. -```shell -git clone https://github.com/alibaba/GraphAr.git -cd GraphAr -git submodule update --init -# Install GraphAr C++ first ... -cd java -export LLVM11_HOME= # In Ubuntu, it is at /usr/lib/llvm-11 -mvn clean install +Firstly, install llvm-11. `LLVM11_HOME` should point to the home of LLVM 11. In Ubuntu, it is at `/usr/lib/llvm-11`. Basically, the build procedure the following binary: + +- `$LLVM11_HOME/bin/clang++` +- `$LLVM11_HOME/bin/ld.lld` +- `$LLVM11_HOME/lib/cmake/llvm` + + +Tips: +- Use Ubuntu as example: + + ```bash + $ sudo apt-get install llvm-11 clang-11 lld-11 libclang-11-dev libz-dev -y + $ export LLVM11_HOME=/usr/lib/llvm-11 + ``` + +- Or compile from source with this [script](https://github.com/alibaba/fastFFI/blob/main/docker/install-llvm11.sh): + ```bash + $ export LLVM11_HOME=/usr/lib/llvm-11 + $ export LLVM_VAR=11.0.0 + $ sudo ./install-llvm11.sh + ``` + +Make the graphar-java-library directory as the current working directory: + + ```bash + $ git clone https://github.com/alibaba/GraphAr.git + $ cd GraphAr + $ git submodule update --init + $ cd java +``` + +Compile package: + +```bash + $ mvn clean install -DskipTests +``` + +Then set GraphAr as a dependency in maven project: + +```xml + + + com.alibaba.graphar + gar-java + 0.1.0 + + ``` ## How to use -We will provide JavaDoc after we finished. Basically, Java's API are same as C++ library's. +Please refer to [GraphAr Java Library Documentation](https://alibaba.github.io/GraphAr/user-guide/java-lib.html). \ No newline at end of file diff --git a/java/pom.xml b/java/pom.xml index 8c308130c..31c76384f 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -6,18 +6,25 @@ com.alibaba.graphar gar-java - 1.0-SNAPSHOT + 0.1.0 jar gar-java + gar-java 8 8 UTF-8 + 3.0.0 + 3.8.1 + 3.3.0 0.1.2 13.0.0 - gar-java + 2.2.1 + 3.4.0 + gar-java-javadoc + gar-java-javadoc @@ -114,7 +121,6 @@ maven-antrun-plugin - 3.0.0 make-and-link-bitcode @@ -148,7 +154,6 @@ org.apache.maven.plugins maven-shade-plugin - 3.2.1 package @@ -227,8 +232,34 @@ 2.5.2 + org.apache.maven.plugins maven-javadoc-plugin - 2.9.1 + ${maven.javadoc.version} + + + + GarphAr + com.alibaba.graphar* + + + + **/*GenGen.java + **/Unused*.java + + ${javadoc.output.directory} + ${javadoc.output.destDir} + false + false + en_US + + + + attach-javadocs + + jar + + + maven-resources-plugin diff --git a/spark/README.md b/spark/README.md index d0f410f23..27fea9c4f 100644 --- a/spark/README.md +++ b/spark/README.md @@ -221,7 +221,7 @@ The example will import the basketballplayer graph from GraphAr to NebulaGraph a You can include GraphAr as a dependency in your maven project -```bash +```xml graphar-mvn-repo