Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

organize an example pagerank app employing the gar library (#44) #46

Merged
merged 6 commits into from
Dec 20, 2022
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,7 @@ spark/target/
# docs
/docs/_build/

# examples
/examples/*/build


27 changes: 18 additions & 9 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ project(graph-archive LANGUAGES C CXX VERSION ${GAR_VERSION})

option(NAMESPACE "User specific namespace, default if GraphArchive" OFF)
option(BUILD_TESTS "Build unit test" OFF)
option(BUILD_EXAMPLES "Build examples" OFF)

if (NAMESPACE)
add_definitions(-DGAR_NAMESPACE=${NAMESPACE})
Expand Down Expand Up @@ -191,6 +192,23 @@ endmacro()

build_gar()

# ------------------------------------------------------------------------------
# build example
# ------------------------------------------------------------------------------
if (BUILD_EXAMPLES)
find_package(Boost REQUIRED COMPONENTS graph)

file(GLOB EXAMPLE_FILES RELATIVE "${PROJECT_SOURCE_DIR}/examples" "${PROJECT_SOURCE_DIR}/examples/*.cc")
foreach(f ${EXAMPLE_FILES})
string(REGEX MATCH "^(.*)\\.[^.]*$" dummy ${f})
set(E_NAME ${CMAKE_MATCH_1})
message(STATUS "Found example - " ${E_NAME})
add_executable(${E_NAME} examples/${E_NAME}.cc)
target_include_directories(${E_NAME} PRIVATE examples ${PROJECT_SOURCE_DIR}/include $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/Catch2/single_include>)
include_directories(SYSTEM ${Boost_INCLUDE_DIRS})
target_link_libraries(${E_NAME} PRIVATE gar ${Boost_LIBRARIES})
endforeach()
endif()

# ------------------------------------------------------------------------------
# Install
Expand Down Expand Up @@ -255,15 +273,6 @@ if (BUILD_TESTS)
add_test(test_arrow_chunk_reader SRCS test/test_arrow_chunk_reader.cc)
add_test(test_graph SRCS test/test_graph.cc)

add_test(test_construct_info_example SRCS test/test_example/test_construct_info_example.cc)
add_test(test_bgl_example SRCS test/test_example/test_bgl_example.cc)
add_test(test_cc_push_example SRCS test/test_example/test_cc_push_example.cc)
add_test(test_cc_stream_example SRCS test/test_example/test_cc_stream_example.cc)
add_test(test_pagerank_example SRCS test/test_example/test_pagerank_example.cc)
add_test(test_bfs_push_example SRCS test/test_example/test_bfs_push_example.cc)
add_test(test_bfs_pull_example SRCS test/test_example/test_bfs_pull_example.cc)
add_test(test_bfs_stream_example SRCS test/test_example/test_bfs_stream_example.cc)
add_test(test_bfs_father_example SRCS test/test_example/test_bfs_father_example.cc)
# enable_testing()
endif()

Expand Down
1 change: 1 addition & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ target linked with GraphAr C++ shared library.
target_compile_features(my_example PRIVATE cxx_std_17)
target_link_libraries(my_example PRIVATE ${GAR_LIBRARIES})

Please refer to `examples/pagerank_example.cc` for details.

Contributing to GraphAr
-----------------------
Expand Down
2 changes: 1 addition & 1 deletion docs/applications/bgl.rst
Original file line number Diff line number Diff line change
Expand Up @@ -90,4 +90,4 @@ Finally, we could use a **VerticesBuilder** of GraphAr to write the results to n
builder.Dump();


.. _test_bgl_example.cc: https://github.com/alibaba/GraphAr/blob/main/test/test_example/test_bgl_example.cc
.. _test_bgl_example.cc: https://github.com/alibaba/GraphAr/blob/main/examples/bgl_example.cc
14 changes: 7 additions & 7 deletions docs/applications/out-of-core.rst
Original file line number Diff line number Diff line change
Expand Up @@ -107,16 +107,16 @@ Meanwhile, BFS could be implemented in a **push**-style which only traverses the
In some cases, it is required to record the path of BFS, that is, to maintain each vertex's predecessor (also called *father*) in the traversing tree rather than only recording the distance. The implementation of BFS with recording fathers can be found at `test_bfs_father_example.cc`_.


.. _test_pagerank_example.cc: https://github.com/alibaba/GraphAr/blob/main/test/test_example/test_pagerank_example.cc
.. _test_pagerank_example.cc: https://github.com/alibaba/GraphAr/blob/main/examples/pagerank_example.cc
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please modify these file names, for example, "test_pagerank_example.cc" -> "pagerank_example.cc"


.. _test_cc_stream_example.cc: https://github.com/alibaba/GraphAr/blob/main/test/test_example/test_cc_stream_example.cc
.. _test_cc_stream_example.cc: https://github.com/alibaba/GraphAr/blob/main/examples/cc_stream_example.cc

.. _test_cc_push_example.cc: https://github.com/alibaba/GraphAr/blob/main/test/test_example/test_cc_push_example.cc
.. _test_cc_push_example.cc: https://github.com/alibaba/GraphAr/blob/main/examples/cc_push_example.cc

.. _test_bfs_stream_example.cc: https://github.com/alibaba/GraphAr/blob/main/test/test_example/test_bfs_stream_example.cc
.. _test_bfs_stream_example.cc: https://github.com/alibaba/GraphAr/blob/main/examples/bfs_stream_example.cc

.. _test_bfs_push_example.cc: https://github.com/alibaba/GraphAr/blob/main/test/test_example/test_bfs_push_example.cc
.. _test_bfs_push_example.cc: https://github.com/alibaba/GraphAr/blob/main/examples/bfs_push_example.cc

.. _test_bfs_pull_example.cc: https://github.com/alibaba/GraphAr/blob/main/test/test_example/test_bfs_pull_example.cc
.. _test_bfs_pull_example.cc: https://github.com/alibaba/GraphAr/blob/main/examples/bfs_pull_example.cc

.. _test_bfs_father_example.cc: https://github.com/alibaba/GraphAr/blob/main/test/test_example/test_bfs_father_example.cc
.. _test_bfs_father_example.cc: https://github.com/alibaba/GraphAr/blob/main/examples/bfs_father_example.cc
4 changes: 2 additions & 2 deletions docs/user-guide/getting-started.rst
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,6 @@ Please refer to `more examples <../applications/out-of-core.html>`_ for learning

.. _./edge/person_knows_person/ordered_by_source/offset/part0: https://github.com/GraphScope/gar-test/blob/main/ldbc_sample/csv/edge/person_knows_person/ordered_by_source/offset/part0

.. _example program: https://github.com/alibaba/GraphAr/blob/main/test/test_example/test_construct_info_example.cc
.. _example program: https://github.com/alibaba/GraphAr/blob/main/examples/construct_info_example.cc

.. _test_pagerank_example.cc: https://github.com/alibaba/GraphAr/blob/main/test/test_example/test_pagerank_example.cc
.. _test_pagerank_example.cc: https://github.com/alibaba/GraphAr/blob/main/examples/pagerank_example.cc
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"test_pagerank_example.cc" -> "pagerank_example.cc"

Original file line number Diff line number Diff line change
Expand Up @@ -17,28 +17,26 @@ limitations under the License.

#include "arrow/api.h"

#include "../config.h"
#include "config.h"
#include "gar/graph.h"
#include "gar/graph_info.h"
#include "gar/reader/arrow_chunk_reader.h"
#include "gar/writer/arrow_chunk_writer.h"
#include "gar/writer/edges_builder.h"

#define CATCH_CONFIG_MAIN
#include <catch2/catch.hpp>

TEST_CASE("test_bfs_with_father_example") {
int main(int argc, char* argv[]) {
// read file and construct graph info
std::string path =
TEST_DATA_DIR + "/ldbc_sample/parquet/ldbc_sample.graph.yml";
auto graph_info = GAR_NAMESPACE::GraphInfo::Load(path).value();

// get the person vertices of graph
std::string label = "person";
REQUIRE(graph_info.GetVertexInfo(label).status().ok());
assert(graph_info.GetVertexInfo(label).status().ok());
auto maybe_vertices =
GAR_NAMESPACE::ConstructVerticesCollection(graph_info, label);
REQUIRE(maybe_vertices.status().ok());
assert(maybe_vertices.status().ok());
auto& vertices = maybe_vertices.value();
int num_vertices = vertices.size();
std::cout << "num_vertices: " << num_vertices << std::endl;
Expand All @@ -48,7 +46,7 @@ TEST_CASE("test_bfs_with_father_example") {
auto maybe_edges = GAR_NAMESPACE::ConstructEdgesCollection(
graph_info, src_label, edge_label, dst_label,
GAR_NAMESPACE::AdjListType::unordered_by_source);
REQUIRE(!maybe_edges.has_error());
assert(!maybe_edges.has_error());
auto& edges = std::get<GAR_NAMESPACE::EdgesCollection<
GAR_NAMESPACE::AdjListType::unordered_by_source>>(maybe_edges.value());

Expand Down Expand Up @@ -93,16 +91,16 @@ TEST_CASE("test_bfs_with_father_example") {

// extend the vertex_info
auto maybe_vertex_info = graph_info.GetVertexInfo(label);
REQUIRE(maybe_vertex_info.status().ok());
assert(maybe_vertex_info.status().ok());
auto vertex_info = maybe_vertex_info.value();
auto maybe_extend_info = vertex_info.Extend(group);
REQUIRE(maybe_extend_info.status().ok());
assert(maybe_extend_info.status().ok());
auto extend_info = maybe_extend_info.value();

// dump the extened vertex info
REQUIRE(extend_info.IsValidated());
REQUIRE(extend_info.Dump().status().ok());
REQUIRE(extend_info.Save("/tmp/person-new-bfs-father.vertex.yml").ok());
assert(extend_info.IsValidated());
assert(extend_info.Dump().status().ok());
assert(extend_info.Save("/tmp/person-new-bfs-father.vertex.yml").ok());
// construct vertex property writer
GAR_NAMESPACE::VertexPropertyWriter writer(extend_info, "file:///tmp/");
// convert results to arrow::Table
Expand All @@ -114,20 +112,20 @@ TEST_CASE("test_bfs_with_father_example") {
father.name,
GAR_NAMESPACE::DataType::DataTypeToArrowDataType(father.type)));
arrow::Int32Builder array_builder1;
REQUIRE(array_builder1.Reserve(num_vertices).ok());
REQUIRE(array_builder1.AppendValues(distance).ok());
assert(array_builder1.Reserve(num_vertices).ok());
assert(array_builder1.AppendValues(distance).ok());
std::shared_ptr<arrow::Array> array1 = array_builder1.Finish().ValueOrDie();
arrays.push_back(array1);

arrow::Int64Builder array_builder2;
REQUIRE(array_builder2.Reserve(num_vertices).ok());
assert(array_builder2.Reserve(num_vertices).ok());
for (int i = 0; i < num_vertices; i++) {
if (pre[i] == -1) {
REQUIRE(array_builder2.AppendNull().ok());
assert(array_builder2.AppendNull().ok());
} else {
auto it = vertices.find(pre[i]);
auto father_id = it.property<int64_t>("id").value();
REQUIRE(array_builder2.Append(father_id).ok());
assert(array_builder2.Append(father_id).ok());
}
}
std::shared_ptr<arrow::Array> array2 = array_builder2.Finish().ValueOrDie();
Expand All @@ -136,7 +134,7 @@ TEST_CASE("test_bfs_with_father_example") {
auto schema = std::make_shared<arrow::Schema>(schema_vector);
std::shared_ptr<arrow::Table> table = arrow::Table::Make(schema, arrays);
// dump the results through writer
REQUIRE(writer.WriteTable(table, group, 0).ok());
assert(writer.WriteTable(table, group, 0).ok());

// construct a new graph
src_label = "person";
Expand All @@ -148,22 +146,22 @@ TEST_CASE("test_bfs_with_father_example") {
GAR_NAMESPACE::EdgeInfo new_edge_info(src_label, edge_label, dst_label,
edge_chunk_size, src_chunk_size,
dst_chunk_size, directed, version);
REQUIRE(new_edge_info
assert(new_edge_info
.AddAdjList(GAR_NAMESPACE::AdjListType::ordered_by_source,
GAR_NAMESPACE::FileType::CSV)
.ok());
REQUIRE(new_edge_info.IsValidated());
assert(new_edge_info.IsValidated());
// save & dump
REQUIRE(!new_edge_info.Dump().has_error());
REQUIRE(new_edge_info.Save("/tmp/person_bfs_person.edge.yml").ok());
assert(!new_edge_info.Dump().has_error());
assert(new_edge_info.Save("/tmp/person_bfs_person.edge.yml").ok());
GAR_NAMESPACE::builder::EdgesBuilder edges_builder(
new_edge_info, "file:///tmp/",
GAR_NAMESPACE::AdjListType::ordered_by_source);
for (int i = 0; i < num_vertices; i++) {
if (i == root || pre[i] == -1)
continue;
GAR_NAMESPACE::builder::Edge e(pre[i], i);
REQUIRE(edges_builder.AddEdge(e).ok());
assert(edges_builder.AddEdge(e).ok());
}
REQUIRE(edges_builder.Dump().ok());
assert(edges_builder.Dump().ok());
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,25 @@ limitations under the License.

#include "arrow/api.h"

#include "../config.h"
#include "config.h"
#include "gar/graph.h"
#include "gar/graph_info.h"
#include "gar/reader/arrow_chunk_reader.h"
#include "gar/writer/arrow_chunk_writer.h"

#define CATCH_CONFIG_MAIN
#include <catch2/catch.hpp>

TEST_CASE("test_bfs_using_pull_example") {
int main(int argc, char* argv[]) {
// read file and construct graph info
std::string path =
TEST_DATA_DIR + "/ldbc_sample/parquet/ldbc_sample.graph.yml";
auto graph_info = GAR_NAMESPACE::GraphInfo::Load(path).value();

// construct vertices collection
std::string label = "person";
REQUIRE(graph_info.GetVertexInfo(label).status().ok());
assert(graph_info.GetVertexInfo(label).status().ok());
auto maybe_vertices =
GAR_NAMESPACE::ConstructVerticesCollection(graph_info, label);
REQUIRE(maybe_vertices.status().ok());
assert(maybe_vertices.status().ok());
auto& vertices = maybe_vertices.value();
int num_vertices = vertices.size();
std::cout << "num_vertices: " << num_vertices << std::endl;
Expand All @@ -46,7 +44,7 @@ TEST_CASE("test_bfs_using_pull_example") {
auto maybe_edges = GAR_NAMESPACE::ConstructEdgesCollection(
graph_info, src_label, edge_label, dst_label,
GAR_NAMESPACE::AdjListType::ordered_by_dest);
REQUIRE(!maybe_edges.has_error());
assert(!maybe_edges.has_error());
auto& edges = std::get<GAR_NAMESPACE::EdgesCollection<
GAR_NAMESPACE::AdjListType::ordered_by_dest>>(maybe_edges.value());

Expand Down Expand Up @@ -92,15 +90,15 @@ TEST_CASE("test_bfs_using_pull_example") {
GAR_NAMESPACE::FileType::PARQUET);
// extend the vertex_info
auto maybe_vertex_info = graph_info.GetVertexInfo(label);
REQUIRE(maybe_vertex_info.status().ok());
assert(maybe_vertex_info.status().ok());
auto vertex_info = maybe_vertex_info.value();
auto maybe_extend_info = vertex_info.Extend(group);
REQUIRE(maybe_extend_info.status().ok());
assert(maybe_extend_info.status().ok());
auto extend_info = maybe_extend_info.value();
// dump the extened vertex info
REQUIRE(extend_info.IsValidated());
REQUIRE(extend_info.Dump().status().ok());
REQUIRE(extend_info.Save("/tmp/person-new-bfs-pull.vertex.yml").ok());
assert(extend_info.IsValidated());
assert(extend_info.Dump().status().ok());
assert(extend_info.Save("/tmp/person-new-bfs-pull.vertex.yml").ok());
// construct vertex property writer
GAR_NAMESPACE::VertexPropertyWriter writer(extend_info, "/tmp/");
// convert results to arrow::Table
Expand All @@ -109,12 +107,12 @@ TEST_CASE("test_bfs_using_pull_example") {
schema_vector.push_back(arrow::field(
bfs.name, GAR_NAMESPACE::DataType::DataTypeToArrowDataType(bfs.type)));
arrow::Int32Builder array_builder;
REQUIRE(array_builder.Reserve(num_vertices).ok());
REQUIRE(array_builder.AppendValues(distance).ok());
assert(array_builder.Reserve(num_vertices).ok());
assert(array_builder.AppendValues(distance).ok());
std::shared_ptr<arrow::Array> array = array_builder.Finish().ValueOrDie();
arrays.push_back(array);
auto schema = std::make_shared<arrow::Schema>(schema_vector);
std::shared_ptr<arrow::Table> table = arrow::Table::Make(schema, arrays);
// dump the results through writer
REQUIRE(writer.WriteTable(table, group, 0).ok());
assert(writer.WriteTable(table, group, 0).ok());
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,25 @@ limitations under the License.

#include "arrow/api.h"

#include "../config.h"
#include "config.h"
#include "gar/graph.h"
#include "gar/graph_info.h"
#include "gar/reader/arrow_chunk_reader.h"
#include "gar/writer/arrow_chunk_writer.h"

#define CATCH_CONFIG_MAIN
#include <catch2/catch.hpp>

TEST_CASE("test_bfs_using_push_example") {
int main(int argc, char* argv[]) {
// read file and construct graph info
std::string path =
TEST_DATA_DIR + "/ldbc_sample/parquet/ldbc_sample.graph.yml";
auto graph_info = GAR_NAMESPACE::GraphInfo::Load(path).value();

// construct vertices collection
std::string label = "person";
REQUIRE(graph_info.GetVertexInfo(label).status().ok());
assert(graph_info.GetVertexInfo(label).status().ok());
auto maybe_vertices =
GAR_NAMESPACE::ConstructVerticesCollection(graph_info, label);
REQUIRE(maybe_vertices.status().ok());
assert(maybe_vertices.status().ok());
auto& vertices = maybe_vertices.value();
int num_vertices = vertices.size();
std::cout << "num_vertices: " << num_vertices << std::endl;
Expand All @@ -46,7 +44,7 @@ TEST_CASE("test_bfs_using_push_example") {
auto maybe_edges = GAR_NAMESPACE::ConstructEdgesCollection(
graph_info, src_label, edge_label, dst_label,
GAR_NAMESPACE::AdjListType::ordered_by_source);
REQUIRE(!maybe_edges.has_error());
assert(!maybe_edges.has_error());
auto& edges = std::get<GAR_NAMESPACE::EdgesCollection<
GAR_NAMESPACE::AdjListType::ordered_by_source>>(maybe_edges.value());

Expand Down Expand Up @@ -91,15 +89,15 @@ TEST_CASE("test_bfs_using_push_example") {
GAR_NAMESPACE::FileType::PARQUET);
// extend the vertex_info
auto maybe_vertex_info = graph_info.GetVertexInfo(label);
REQUIRE(maybe_vertex_info.status().ok());
assert(maybe_vertex_info.status().ok());
auto vertex_info = maybe_vertex_info.value();
auto maybe_extend_info = vertex_info.Extend(group);
REQUIRE(maybe_extend_info.status().ok());
assert(maybe_extend_info.status().ok());
auto extend_info = maybe_extend_info.value();
// dump the extened vertex info
REQUIRE(extend_info.IsValidated());
REQUIRE(extend_info.Dump().status().ok());
REQUIRE(extend_info.Save("/tmp/person-new-bfs-push.vertex.yml").ok());
assert(extend_info.IsValidated());
assert(extend_info.Dump().status().ok());
assert(extend_info.Save("/tmp/person-new-bfs-push.vertex.yml").ok());
// construct vertex property writer
GAR_NAMESPACE::VertexPropertyWriter writer(extend_info, "/tmp/");
// convert results to arrow::Table
Expand All @@ -108,12 +106,12 @@ TEST_CASE("test_bfs_using_push_example") {
schema_vector.push_back(arrow::field(
bfs.name, GAR_NAMESPACE::DataType::DataTypeToArrowDataType(bfs.type)));
arrow::Int32Builder array_builder;
REQUIRE(array_builder.Reserve(num_vertices).ok());
REQUIRE(array_builder.AppendValues(distance).ok());
assert(array_builder.Reserve(num_vertices).ok());
assert(array_builder.AppendValues(distance).ok());
std::shared_ptr<arrow::Array> array = array_builder.Finish().ValueOrDie();
arrays.push_back(array);
auto schema = std::make_shared<arrow::Schema>(schema_vector);
std::shared_ptr<arrow::Table> table = arrow::Table::Make(schema, arrays);
// dump the results through writer
REQUIRE(writer.WriteTable(table, group, 0).ok());
assert(writer.WriteTable(table, group, 0).ok());
}
Loading