diff --git a/cpp/examples/high_level_reader_example.cc b/cpp/examples/high_level_reader_example.cc new file mode 100644 index 000000000..ed426b294 --- /dev/null +++ b/cpp/examples/high_level_reader_example.cc @@ -0,0 +1,130 @@ +/** Copyright 2022 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include "arrow/api.h" +#include "arrow/filesystem/api.h" + +#include "./config.h" +#include "gar/graph.h" + +void vertices_collection(const GAR_NAMESPACE::GraphInfo& graph_info) { + // construct vertices collection + std::string label = "person", property = "firstName"; + auto maybe_vertices_collection = + GAR_NAMESPACE::ConstructVerticesCollection(graph_info, label); + ASSERT(!maybe_vertices_collection.has_error()); + auto vertices = maybe_vertices_collection.value(); + + // use vertices collection + auto count = 0; + // iterate through vertices collection + for (auto it = vertices->begin(); it != vertices->end(); ++it) { + count++; + // print the first 10 vertices + if (count > 10) { + continue; + } + // access data through iterator directly + std::cout << it.id() << ", id=" << it.property("id").value() + << ", firstName=" << it.property("firstName").value() + << "; "; + // access data through vertex + auto vertex = *it; + std::cout << vertex.id() + << ", id=" << vertex.property("id").value() + << ", firstName=" + << vertex.property("firstName").value() << std::endl; + } + // add operator+ for iterator + auto it_last = vertices->begin() + (count - 1); + std::cout << "the last vertex: " << std::endl; + std::cout << it_last.id() + << ", id=" << it_last.property("id").value() + << ", firstName=" + << it_last.property("firstName").value() << std::endl; + // find the vertex with internal id = 100 + auto it_find = vertices->find(100); + std::cout << "the vertex with internal id = 100: " << std::endl; + std::cout << it_find.id() + << ", id=" << it_find.property("id").value() + << ", firstName=" + << it_find.property("firstName").value() << std::endl; + // count + ASSERT(count == vertices->size()); + std::cout << "vertex_count=" << count << std::endl; +} + +void edges_collection(const GAR_NAMESPACE::GraphInfo& graph_info) { + // construct edges collection + std::string src_label = "person", edge_label = "knows", dst_label = "person"; + auto expect = GAR_NAMESPACE::ConstructEdgesCollection( + graph_info, src_label, edge_label, dst_label, + GAR_NAMESPACE::AdjListType::ordered_by_source); + ASSERT(!expect.has_error()); + auto edges = expect.value(); + + // use edges collection + auto begin = edges->begin(); + auto end = edges->end(); + size_t count = 0; + // iterate through edges collection + for (auto it = begin; it != end; ++it) { + count++; + // print the first 10 edges + if (count > 10) { + continue; + } + // access data through iterator directly + std::cout << "src=" << it.source() << ", dst=" << it.destination() << "; "; + // access data through edge + auto edge = *it; + std::cout << "src=" << edge.source() << ", dst=" << edge.destination() + << ", creationDate=" + << edge.property("creationDate").value() + << std::endl; + } + // find the first edge with source = 100 + auto it_find = edges->find_src(100, begin); + std::cout << "the edge with source = 100: " << std::endl; + do { + std::cout << "src=" << it_find.source() << ", dst=" << it_find.destination() + << ", creationDate=" + << it_find.property("creationDate").value() + << std::endl; + } while (it_find.next_src()); + + // count + ASSERT(count == edges->size()); + std::cout << "edge_count=" << count << std::endl; +} + +int main(int argc, char* argv[]) { + // read file and construct graph info + std::string path = + TEST_DATA_DIR + "/ldbc_sample/parquet/ldbc_sample.graph.yml"; + auto graph_info = GAR_NAMESPACE::GraphInfo::Load(path).value(); + + // vertices collection + std::cout << "Vertices collection" << std::endl; + std::cout << "-------------------" << std::endl; + vertices_collection(graph_info); + std::cout << std::endl; + + // edges collection + std::cout << "Edges collection" << std::endl; + std::cout << "----------------" << std::endl; + edges_collection(graph_info); +} diff --git a/cpp/examples/high_level_writer_example.cc b/cpp/examples/high_level_writer_example.cc new file mode 100644 index 000000000..104987cb0 --- /dev/null +++ b/cpp/examples/high_level_writer_example.cc @@ -0,0 +1,118 @@ +/** Copyright 2022 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include "arrow/api.h" +#include "arrow/filesystem/api.h" + +#include "./config.h" +#include "gar/writer/edges_builder.h" +#include "gar/writer/vertices_builder.h" + +void vertices_builder() { + // construct vertices builder + std::string vertex_meta_file = + TEST_DATA_DIR + "/ldbc_sample/parquet/" + "person.vertex.yml"; + auto vertex_meta = GAR_NAMESPACE::Yaml::LoadFile(vertex_meta_file).value(); + auto vertex_info = GAR_NAMESPACE::VertexInfo::Load(vertex_meta).value(); + GAR_NAMESPACE::IdType start_index = 0; + GAR_NAMESPACE::builder::VerticesBuilder builder(vertex_info, "/tmp/", + start_index); + + // set validate level + builder.SetValidateLevel(GAR_NAMESPACE::ValidateLevel::strong_validate); + + // prepare vertex data + int vertex_count = 3; + std::vector property_names = {"id", "firstName", "lastName", + "gender"}; + std::vector id = {0, 1, 2}; + std::vector firstName = {"John", "Jane", "Alice"}; + std::vector lastName = {"Smith", "Doe", "Wonderland"}; + std::vector gender = {"male", "famale", "famale"}; + + // add vertices + for (int i = 0; i < vertex_count; i++) { + GAR_NAMESPACE::builder::Vertex v; + v.AddProperty(property_names[0], id[i]); + v.AddProperty(property_names[1], firstName[i]); + v.AddProperty(property_names[2], lastName[i]); + v.AddProperty(property_names[2], gender[i]); + ASSERT(builder.AddVertex(v).ok()); + } + + // dump + ASSERT(builder.GetNum() == vertex_count); + std::cout << "vertex_count=" << builder.GetNum() << std::endl; + ASSERT(builder.Dump().ok()); + std::cout << "dump vertices collection successfully!" << std::endl; + + // clear vertices + builder.Clear(); + ASSERT(builder.GetNum() == 0); +} + +void edges_builder() { + // construct edges builder + std::string edge_meta_file = + TEST_DATA_DIR + "/ldbc_sample/parquet/" + "person_knows_person.edge.yml"; + auto edge_meta = GAR_NAMESPACE::Yaml::LoadFile(edge_meta_file).value(); + auto edge_info = GAR_NAMESPACE::EdgeInfo::Load(edge_meta).value(); + auto vertex_count = 3; + GAR_NAMESPACE::builder::EdgesBuilder builder( + edge_info, "/tmp/", GraphArchive::AdjListType::ordered_by_dest, + vertex_count); + + // set validate level + builder.SetValidateLevel(GAR_NAMESPACE::ValidateLevel::strong_validate); + + // prepare edge data + int edge_count = 4; + std::vector property_names = {"creationDate"}; + std::vector src = {1, 0, 0, 2}; + std::vector dst = {0, 1, 2, 1}; + std::vector creationDate = {"2010-01-01", "2011-01-01", + "2012-01-01", "2013-01-01"}; + + // add edges + for (int i = 0; i < edge_count; i++) { + GAR_NAMESPACE::builder::Edge e(src[i], dst[i]); + e.AddProperty("creationDate", creationDate[i]); + ASSERT(builder.AddEdge(e).ok()); + } + + // dump + ASSERT(builder.GetNum() == edge_count); + std::cout << "edge_count=" << builder.GetNum() << std::endl; + ASSERT(builder.Dump().ok()); + std::cout << "dump edges collection successfully!" << std::endl; + + // clear edges + builder.Clear(); + ASSERT(builder.GetNum() == 0); +} + +int main(int argc, char* argv[]) { + // vertices builder + std::cout << "Vertices builder" << std::endl; + std::cout << "-------------------" << std::endl; + vertices_builder(); + std::cout << std::endl; + + // edges builder + std::cout << "Edges builder" << std::endl; + std::cout << "----------------" << std::endl; + edges_builder(); +} diff --git a/cpp/examples/low_level_reader_example.cc b/cpp/examples/low_level_reader_example.cc new file mode 100644 index 000000000..11948b76b --- /dev/null +++ b/cpp/examples/low_level_reader_example.cc @@ -0,0 +1,153 @@ +/** Copyright 2022 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include "arrow/api.h" +#include "arrow/filesystem/api.h" + +#include "./config.h" +#include "gar/reader/chunk_info_reader.h" + +void vertex_property_chunk_info_reader( + const GAR_NAMESPACE::GraphInfo& graph_info) { + // constuct reader + std::string label = "person", property_name = "id"; + ASSERT(graph_info.GetVertexInfo(label).status().ok()); + auto maybe_group = graph_info.GetVertexPropertyGroup(label, property_name); + ASSERT(!maybe_group.has_error()); + const GAR_NAMESPACE::PropertyGroup& group = maybe_group.value(); + auto maybe_reader = GAR_NAMESPACE::ConstructVertexPropertyChunkInfoReader( + graph_info, label, group); + ASSERT(!maybe_reader.has_error()); + GAR_NAMESPACE::VertexPropertyChunkInfoReader& reader = maybe_reader.value(); + + // use reader + auto maybe_chunk_path = reader.GetChunk(); + ASSERT(maybe_chunk_path.status().ok()); + std::string chunk_path = maybe_chunk_path.value(); + std::cout << "path of first vertex property chunk: " << chunk_path + << std::endl; + // seek vertex id + ASSERT(reader.seek(520).ok()); + maybe_chunk_path = reader.GetChunk(); + ASSERT(maybe_chunk_path.status().ok()); + chunk_path = maybe_chunk_path.value(); + std::cout << "path of vertex property chunk for vertex id 520: " << chunk_path + << std::endl; + // next chunk + ASSERT(reader.next_chunk().ok()); + maybe_chunk_path = reader.GetChunk(); + ASSERT(maybe_chunk_path.status().ok()); + chunk_path = maybe_chunk_path.value(); + std::cout << "path of next vertex property chunk: " << chunk_path + << std::endl; + std::cout << "vertex property chunk number: " << reader.GetChunkNum() + << std::endl; +} + +void adj_list_chunk_info_reader(const GAR_NAMESPACE::GraphInfo& graph_info) { + // constuct reader + std::string src_label = "person", edge_label = "knows", dst_label = "person"; + auto maybe_reader = GAR_NAMESPACE::ConstructAdjListChunkInfoReader( + graph_info, src_label, edge_label, dst_label, + GAR_NAMESPACE::AdjListType::ordered_by_source); + ASSERT(maybe_reader.status().ok()); + auto& reader = maybe_reader.value(); + + // use reader + auto maybe_chunk_path = reader.GetChunk(); + ASSERT(maybe_chunk_path.status().ok()); + auto chunk_path = maybe_chunk_path.value(); + std::cout << "path of first adj_list chunk: " << chunk_path << std::endl; + // seek src + ASSERT(reader.seek_src(100).ok()); + maybe_chunk_path = reader.GetChunk(); + ASSERT(maybe_chunk_path.status().ok()); + chunk_path = maybe_chunk_path.value(); + std::cout + << "path of fisrt adj_list chunk for outgoing edges of vertex id 100: " + << chunk_path << std::endl; + // next chunk + ASSERT(reader.next_chunk().ok()); + maybe_chunk_path = reader.GetChunk(); + ASSERT(maybe_chunk_path.status().ok()); + chunk_path = maybe_chunk_path.value(); + std::cout << "path of next adj_list chunk: " << chunk_path << std::endl; +} + +void adj_list_property_chunk_info_reader( + const GAR_NAMESPACE::GraphInfo& graph_info) { + // constuct reader + std::string src_label = "person", edge_label = "knows", dst_label = "person", + property_name = "creationDate"; + + auto maybe_group = graph_info.GetEdgePropertyGroup( + src_label, edge_label, dst_label, property_name, + GAR_NAMESPACE::AdjListType::ordered_by_source); + ASSERT(maybe_group.status().ok()); + auto group = maybe_group.value(); + auto maybe_property_reader = + GAR_NAMESPACE::ConstructAdjListPropertyChunkInfoReader( + graph_info, src_label, edge_label, dst_label, group, + GAR_NAMESPACE::AdjListType::ordered_by_source); + ASSERT(maybe_property_reader.status().ok()); + auto reader = maybe_property_reader.value(); + + // use reader + auto maybe_chunk_path = reader.GetChunk(); + ASSERT(maybe_chunk_path.status().ok()); + auto chunk_path = maybe_chunk_path.value(); + std::cout << "path of first adj_list property chunk: " << chunk_path + << std::endl; + // seek src + ASSERT(reader.seek_src(100).ok()); + maybe_chunk_path = reader.GetChunk(); + ASSERT(maybe_chunk_path.status().ok()); + chunk_path = maybe_chunk_path.value(); + std::cout << "path of fisrt adj_list property chunk for outgoing edges of " + "vertex id 100: " + << chunk_path << std::endl; + // next chunk + ASSERT(reader.next_chunk().ok()); + maybe_chunk_path = reader.GetChunk(); + ASSERT(maybe_chunk_path.status().ok()); + chunk_path = maybe_chunk_path.value(); + std::cout << "path of next adj_list property chunk: " << chunk_path + << std::endl; +} + +int main(int argc, char* argv[]) { + // read file and construct graph info + std::string path = + TEST_DATA_DIR + "/ldbc_sample/parquet/ldbc_sample.graph.yml"; + auto graph_info = GAR_NAMESPACE::GraphInfo::Load(path).value(); + + // vertex property chunk info reader + std::cout << "Vertex property chunk info reader" << std::endl; + std::cout << "---------------------------------" << std::endl; + vertex_property_chunk_info_reader(graph_info); + std::cout << std::endl; + + // adj_list chunk info reader + std::cout << "Adj_list chunk info reader" << std::endl; + std::cout << "--------------------------" << std::endl; + adj_list_chunk_info_reader(graph_info); + std::cout << std::endl; + + // adj_list property chunk info reader + std::cout << "Adj_list property chunk info reader" << std::endl; + std::cout << "-----------------------------------" << std::endl; + adj_list_property_chunk_info_reader(graph_info); +} diff --git a/cpp/examples/mid_level_reader_example.cc b/cpp/examples/mid_level_reader_example.cc new file mode 100644 index 000000000..e92c2f026 --- /dev/null +++ b/cpp/examples/mid_level_reader_example.cc @@ -0,0 +1,243 @@ +/** Copyright 2022 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include "arrow/api.h" +#include "arrow/filesystem/api.h" + +#include "./config.h" +#include "gar/reader/arrow_chunk_reader.h" +#include "gar/util/expression.h" + +void vertex_property_chunk_reader(const GAR_NAMESPACE::GraphInfo& graph_info) { + // constuct reader + std::string label = "person", property_name = "gender"; + ASSERT(graph_info.GetVertexInfo(label).status().ok()); + auto maybe_group = graph_info.GetVertexPropertyGroup(label, property_name); + ASSERT(maybe_group.status().ok()); + auto group = maybe_group.value(); + auto maybe_reader = GAR_NAMESPACE::ConstructVertexPropertyArrowChunkReader( + graph_info, label, group); + ASSERT(maybe_reader.status().ok()); + auto reader = maybe_reader.value(); + + // use reader + auto result = reader.GetChunk(); + ASSERT(!result.has_error()); + auto range = reader.GetRange().value(); + std::cout << "chunk number: " << reader.GetChunkNum() << std::endl; + std::cout << "range of fisrt vertex property chunk: " << range.first << " " + << range.second << std::endl; + auto table = result.value(); + std::cout << "rows number of first vertex property chunk: " + << table->num_rows() << std::endl; + std::cout << "schema of first vertex property chunk: " << std::endl + << table->schema()->ToString() << std::endl; + // seek vertex id + ASSERT(reader.seek(100).ok()); + result = reader.GetChunk(); + ASSERT(!result.has_error()); + range = reader.GetRange().value(); + std::cout << "range of vertex property chunk for vertex id 100: " + << range.first << " " << range.second << std::endl; + // next chunk + ASSERT(reader.next_chunk().ok()); + result = reader.GetChunk(); + ASSERT(!result.has_error()); + range = reader.GetRange().value(); + std::cout << "range of next vertex property chunk: " << range.first << " " + << range.second << std::endl; + + // reader with filter pushdown + auto filter = GAR_NAMESPACE::_Equal(GAR_NAMESPACE::_Property("gender"), + GAR_NAMESPACE::_Literal("female")); + std::vector expected_cols{"firstName", "lastName"}; + auto maybe_filter_reader = + GAR_NAMESPACE::ConstructVertexPropertyArrowChunkReader(graph_info, label, + group); + ASSERT(maybe_filter_reader.status().ok()); + auto filter_reader = maybe_filter_reader.value(); + filter_reader.Filter(filter); + filter_reader.Select(expected_cols); + auto filter_result = filter_reader.GetChunk(); + ASSERT(!result.has_error()); + auto filter_table = filter_result.value(); + std::cout << "rows number of first filtered vertex property chunk: " + << filter_table->num_rows() << std::endl; + std::cout << "schema of first filtered vertex property chunk: " << std::endl + << filter_table->schema()->ToString() << std::endl; +} + +void adj_list_chunk_reader(const GAR_NAMESPACE::GraphInfo& graph_info) { + // constuct reader + std::string src_label = "person", edge_label = "knows", dst_label = "person"; + ASSERT( + graph_info.GetEdgeInfo(src_label, edge_label, dst_label).status().ok()); + auto maybe_reader = GAR_NAMESPACE::ConstructAdjListArrowChunkReader( + graph_info, src_label, edge_label, dst_label, + GAR_NAMESPACE::AdjListType::ordered_by_source); + ASSERT(maybe_reader.status().ok()); + + // use reader + auto reader = maybe_reader.value(); + auto result = reader.GetChunk(); + ASSERT(!result.has_error()); + auto table = result.value(); + std::cout << "rows number of first adj_list chunk: " << table->num_rows() + << std::endl; + std::cout << "schema of first adj_list chunk: " << std::endl + << table->schema()->ToString() << std::endl; + // seek src + ASSERT(reader.seek_src(100).ok()); + result = reader.GetChunk(); + ASSERT(!result.has_error()); + table = result.value(); + std::cout << "rows number of first adj_list chunk for outgoing edges of " + "vertex id 100: " + << table->num_rows() << std::endl; + // next chunk + ASSERT(reader.next_chunk().ok()); + result = reader.GetChunk(); + ASSERT(!result.has_error()); + table = result.value(); + std::cout << "rows number of next adj_list chunk: " << table->num_rows() + << std::endl; +} + +void adj_list_property_chunk_reader( + const GAR_NAMESPACE::GraphInfo& graph_info) { + // constuct reader + std::string src_label = "person", edge_label = "knows", dst_label = "person", + property_name = "creationDate"; + auto maybe_group = graph_info.GetEdgePropertyGroup( + src_label, edge_label, dst_label, property_name, + GAR_NAMESPACE::AdjListType::ordered_by_source); + ASSERT(maybe_group.status().ok()); + auto group = maybe_group.value(); + auto maybe_reader = GAR_NAMESPACE::ConstructAdjListPropertyArrowChunkReader( + graph_info, src_label, edge_label, dst_label, group, + GAR_NAMESPACE::AdjListType::ordered_by_source); + ASSERT(maybe_reader.status().ok()); + auto reader = maybe_reader.value(); + + // use reader + auto result = reader.GetChunk(); + ASSERT(!result.has_error()); + auto table = result.value(); + std::cout << "rows number of first adj_list property chunk: " + << table->num_rows() << std::endl; + std::cout << "schema of first adj_list property chunk: " << std::endl + << table->schema()->ToString() << std::endl; + // seek src + ASSERT(reader.seek_src(100).ok()); + result = reader.GetChunk(); + ASSERT(!result.has_error()); + table = result.value(); + std::cout << "rows number of first adj_list property chunk for outgoing " + "edges of vertex id 100: " + << table->num_rows() << std::endl; + // next chunk + ASSERT(reader.next_chunk().ok()); + result = reader.GetChunk(); + ASSERT(!result.has_error()); + table = result.value(); + std::cout << "rows number of next adj_list property chunk: " + << table->num_rows() << std::endl; + + // reader with filter pushdown + auto expr1 = GAR_NAMESPACE::_LessThan( + GAR_NAMESPACE::_Literal("2012-06-02T04:30:44.526+0000"), + GAR_NAMESPACE::_Property(property_name)); + auto expr2 = GAR_NAMESPACE::_Equal(GAR_NAMESPACE::_Property(property_name), + GAR_NAMESPACE::_Property(property_name)); + auto filter = GAR_NAMESPACE::_And(expr1, expr2); + std::vector expected_cols{"creationDate"}; + auto maybe_filter_reader = + GAR_NAMESPACE::ConstructAdjListPropertyArrowChunkReader( + graph_info, src_label, edge_label, dst_label, group, + GAR_NAMESPACE::AdjListType::ordered_by_source); + ASSERT(maybe_filter_reader.status().ok()); + auto filter_reader = maybe_filter_reader.value(); + filter_reader.Filter(filter); + filter_reader.Select(expected_cols); + auto filter_result = filter_reader.GetChunk(); + ASSERT(!result.has_error()); + auto filter_table = filter_result.value(); + std::cout << "rows number of first filtered adj_list property chunk: " + << filter_table->num_rows() << std::endl; +} + +void adj_list_offset_chunk_reader(const GAR_NAMESPACE::GraphInfo& graph_info) { + // constuct reader + std::string src_label = "person", edge_label = "knows", dst_label = "person"; + ASSERT( + graph_info.GetEdgeInfo(src_label, edge_label, dst_label).status().ok()); + auto maybe_reader = GAR_NAMESPACE::ConstructAdjListOffsetArrowChunkReader( + graph_info, src_label, edge_label, dst_label, + GAR_NAMESPACE::AdjListType::ordered_by_source); + ASSERT(maybe_reader.status().ok()); + auto reader = maybe_reader.value(); + + // use reader + auto result = reader.GetChunk(); + ASSERT(!result.has_error()); + auto array = result.value(); + std::cout << "length of first adj_list offset chunk: " << array->length() + << std::endl; + // next chunk + ASSERT(reader.next_chunk().ok()); + result = reader.GetChunk(); + ASSERT(!result.has_error()); + array = result.value(); + std::cout << "length of next adj_list offset chunk: " << array->length() + << std::endl; + // seek vertex id + ASSERT(reader.seek(900).ok()); + result = reader.GetChunk(); + ASSERT(!result.has_error()); + array = result.value(); + std::cout << "length of adj_list offset chunk for vertex id 900: " + << array->length() << std::endl; +} + +int main(int argc, char* argv[]) { + // read file and construct graph info + std::string path = + TEST_DATA_DIR + "/ldbc_sample/parquet/ldbc_sample.graph.yml"; + auto graph_info = GAR_NAMESPACE::GraphInfo::Load(path).value(); + + // vertex property chunk reader + std::cout << "Vertex property chunk reader" << std::endl; + std::cout << "----------------------------" << std::endl; + vertex_property_chunk_reader(graph_info); + std::cout << std::endl; + + // adj_list chunk reader + std::cout << "Adj_list chunk reader" << std::endl; + std::cout << "---------------------" << std::endl; + adj_list_chunk_reader(graph_info); + std::cout << std::endl; + + // adj_list property chunk reader + std::cout << "Adj_list property chunk reader" << std::endl; + std::cout << "------------------------------" << std::endl; + adj_list_property_chunk_reader(graph_info); + std::cout << std::endl; + + // adj_list offset chunk reader + std::cout << "Adj_list offset chunk reader" << std::endl; + std::cout << "----------------------------" << std::endl; + adj_list_offset_chunk_reader(graph_info); +} diff --git a/cpp/examples/mid_level_writer_example.cc b/cpp/examples/mid_level_writer_example.cc new file mode 100644 index 000000000..61ce0ab66 --- /dev/null +++ b/cpp/examples/mid_level_writer_example.cc @@ -0,0 +1,199 @@ +/** Copyright 2022 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include "arrow/api.h" +#include "arrow/filesystem/api.h" +#include "arrow/result.h" + +#include "./config.h" +#include "gar/writer/arrow_chunk_writer.h" + +arrow::Result> generate_vertex_table() { + // property "id" + arrow::Int64Builder i64builder; + ARROW_RETURN_NOT_OK(i64builder.AppendValues({0, 1, 2})); + std::shared_ptr i64array; + ARROW_RETURN_NOT_OK(i64builder.Finish(&i64array)); + + // property "firstName" + arrow::StringBuilder strbuilder; + ARROW_RETURN_NOT_OK(strbuilder.Append("John")); + ARROW_RETURN_NOT_OK(strbuilder.Append("Jane")); + ARROW_RETURN_NOT_OK(strbuilder.Append("Alice")); + std::shared_ptr strarray; + ARROW_RETURN_NOT_OK(strbuilder.Finish(&strarray)); + + // property "lastName" + arrow::StringBuilder strbuilder2; + ARROW_RETURN_NOT_OK(strbuilder2.Append("Smith")); + ARROW_RETURN_NOT_OK(strbuilder2.Append("Doe")); + ARROW_RETURN_NOT_OK(strbuilder2.Append("Wonderland")); + std::shared_ptr strarray2; + ARROW_RETURN_NOT_OK(strbuilder2.Finish(&strarray2)); + + // property "gender" + arrow::StringBuilder strbuilder3; + ARROW_RETURN_NOT_OK(strbuilder2.Append("male")); + ARROW_RETURN_NOT_OK(strbuilder2.Append("female")); + ARROW_RETURN_NOT_OK(strbuilder2.Append("female")); + std::shared_ptr strarray3; + ARROW_RETURN_NOT_OK(strbuilder2.Finish(&strarray3)); + + // schema + auto schema = arrow::schema({arrow::field("id", arrow::int64()), + arrow::field("firstName", arrow::utf8()), + arrow::field("lastName", arrow::utf8()), + arrow::field("gender", arrow::utf8())}); + return arrow::Table::Make(schema, {i64array, strarray, strarray2, strarray3}); +} + +arrow::Result> generate_adj_list_table() { + // source vertex id + arrow::Int64Builder i64builder; + ARROW_RETURN_NOT_OK(i64builder.AppendValues({1, 0, 0, 2})); + std::shared_ptr i64array; + ARROW_RETURN_NOT_OK(i64builder.Finish(&i64array)); + + // destination vertex id + arrow::Int64Builder i64builder2; + ARROW_RETURN_NOT_OK(i64builder2.AppendValues({0, 1, 2, 1})); + std::shared_ptr i64array2; + ARROW_RETURN_NOT_OK(i64builder2.Finish(&i64array2)); + + // schema + auto schema = arrow::schema( + {arrow::field(GAR_NAMESPACE::GeneralParams::kSrcIndexCol, arrow::int64()), + arrow::field(GAR_NAMESPACE::GeneralParams::kDstIndexCol, + arrow::int64())}); + return arrow::Table::Make(schema, {i64array, i64array2}); +} + +arrow::Result> generate_edge_property_table() { + // property "creationDate" + arrow::StringBuilder strbuilder; + ARROW_RETURN_NOT_OK(strbuilder.Append("2010-01-01")); + ARROW_RETURN_NOT_OK(strbuilder.Append("2011-01-01")); + ARROW_RETURN_NOT_OK(strbuilder.Append("2012-01-01")); + ARROW_RETURN_NOT_OK(strbuilder.Append("2013-01-01")); + std::shared_ptr strarray; + ARROW_RETURN_NOT_OK(strbuilder.Finish(&strarray)); + + // schema + auto schema = arrow::schema({arrow::field("creationDate", arrow::utf8())}); + return arrow::Table::Make(schema, {strarray}); +} + +void vertex_property_writer(const GAR_NAMESPACE::GraphInfo& graph_info) { + // constuct writer + std::string vertex_meta_file = + TEST_DATA_DIR + "/ldbc_sample/parquet/" + "person.vertex.yml"; + auto vertex_meta = GAR_NAMESPACE::Yaml::LoadFile(vertex_meta_file).value(); + auto vertex_info = GAR_NAMESPACE::VertexInfo::Load(vertex_meta).value(); + ASSERT(vertex_info.GetLabel() == "person"); + GAR_NAMESPACE::VertexPropertyWriter writer(vertex_info, "/tmp/"); + + // construct vertex property table + auto table = generate_vertex_table().ValueOrDie(); + // print + std::cout << "rows number of vertex table: " << table->num_rows() + << std::endl; + std::cout << "schema of vertex table: " << std::endl + << table->schema()->ToString() << std::endl; + + // use writer + // set validate level + writer.SetValidateLevel(GAR_NAMESPACE::ValidateLevel::strong_validate); + // write the table + ASSERT(writer.WriteTable(table, 0).ok()); + // write the number of vertices + ASSERT(writer.WriteVerticesNum(table->num_rows()).ok()); + std::cout << "writing vertex data successfully!" << std::endl; + // check vertex count + auto path = "/tmp/vertex/person/vertex_count"; + auto fs = arrow::fs::FileSystemFromUriOrPath(path).ValueOrDie(); + auto input = fs->OpenInputStream(path).ValueOrDie(); + auto num = input->Read(sizeof(GAR_NAMESPACE::IdType)).ValueOrDie(); + GAR_NAMESPACE::IdType* ptr = (GAR_NAMESPACE::IdType*) num->data(); + std::cout << "vertex count from reading written file: " << *ptr << std::endl; +} + +void edge_chunk_writer(const GAR_NAMESPACE::GraphInfo& graph_info) { + // constuct writer + std::string edge_meta_file = + TEST_DATA_DIR + "/ldbc_sample/csv/" + "person_knows_person.edge.yml"; + auto edge_meta = GAR_NAMESPACE::Yaml::LoadFile(edge_meta_file).value(); + auto edge_info = GAR_NAMESPACE::EdgeInfo::Load(edge_meta).value(); + auto adj_list_type = GAR_NAMESPACE::AdjListType::ordered_by_source; + GAR_NAMESPACE::EdgeChunkWriter writer(edge_info, "/tmp/", adj_list_type); + + // construct property chunk + auto chunk = generate_edge_property_table().ValueOrDie(); + // print + std::cout << "rows number of edge property chunk: " << chunk->num_rows() + << std::endl; + std::cout << "schema of edge property chunk: " << std::endl + << chunk->schema()->ToString() << std::endl; + // construct adj list table + auto table = generate_adj_list_table().ValueOrDie(); + // print + std::cout << "rows number of adj list table: " << table->num_rows() + << std::endl; + std::cout << "schema of adj list table: " << std::endl + << table->schema()->ToString() << std::endl; + + // use writer + // set validate level + writer.SetValidateLevel(GAR_NAMESPACE::ValidateLevel::strong_validate); + // write a property chunk + GAR_NAMESPACE::PropertyGroup pg = + edge_info.GetPropertyGroup("creationDate", adj_list_type).value(); + ASSERT(writer.WritePropertyChunk(chunk, pg, 0, 0).ok()); + // write adj list of vertex chunk 0 to files + ASSERT(writer.SortAndWriteAdjListTable(table, 0, 0).ok()); + // write number of edges for vertex chunk 0 + ASSERT(writer.WriteEdgesNum(0, table->num_rows()).ok()); + // write number of vertices + ASSERT(writer.WriteVerticesNum(903).ok()); + std::cout << "writing edge data successfully!" << std::endl; + // check the number of edges + auto path = "/tmp/edge/person_knows_person/ordered_by_source/edge_count0"; + auto fs = arrow::fs::FileSystemFromUriOrPath(path).ValueOrDie(); + std::shared_ptr input = + fs->OpenInputStream(path).ValueOrDie(); + auto edge_num = input->Read(sizeof(GAR_NAMESPACE::IdType)).ValueOrDie(); + GAR_NAMESPACE::IdType* edge_num_ptr = + (GAR_NAMESPACE::IdType*) edge_num->data(); + std::cout << "edge number from reading written file: " << *edge_num_ptr + << std::endl; +} + +int main(int argc, char* argv[]) { + // read file and construct graph info + std::string path = + TEST_DATA_DIR + "/ldbc_sample/parquet/ldbc_sample.graph.yml"; + auto graph_info = GAR_NAMESPACE::GraphInfo::Load(path).value(); + + // vertex property writer + std::cout << "Vertex property writer" << std::endl; + std::cout << "----------------------" << std::endl; + vertex_property_writer(graph_info); + std::cout << std::endl; + + // edge property writer + std::cout << "Edge property writer" << std::endl; + std::cout << "--------------------" << std::endl; + edge_chunk_writer(graph_info); +}