Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[C++] Add examples about how to use C++ reader/writer #252

Merged
merged 11 commits into from
Oct 19, 2023
Merged
130 changes: 130 additions & 0 deletions cpp/examples/high_level_reader_example.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
/** Copyright 2022 Alibaba Group Holding Limited.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <iostream>

#include "arrow/api.h"
#include "arrow/filesystem/api.h"

#include "./config.h"
#include "gar/graph.h"

void vertices_collection(const GAR_NAMESPACE::GraphInfo& graph_info) {
// construct vertices collection
std::string label = "person", property = "firstName";
auto maybe_vertices_collection =
GAR_NAMESPACE::ConstructVerticesCollection(graph_info, label);
ASSERT(!maybe_vertices_collection.has_error());
auto vertices = maybe_vertices_collection.value();

// use vertices collection
auto count = 0;
// iterate through vertices collection
for (auto it = vertices->begin(); it != vertices->end(); ++it) {
count++;
// print the first 10 vertices
if (count > 10) {
continue;
}
// access data through iterator directly
std::cout << it.id() << ", id=" << it.property<int64_t>("id").value()
<< ", firstName=" << it.property<std::string>("firstName").value()
<< "; ";
// access data through vertex
auto vertex = *it;
std::cout << vertex.id()
<< ", id=" << vertex.property<int64_t>("id").value()
<< ", firstName="
<< vertex.property<std::string>("firstName").value() << std::endl;
}
// add operator+ for iterator
auto it_last = vertices->begin() + (count - 1);
std::cout << "the last vertex: " << std::endl;
std::cout << it_last.id()
<< ", id=" << it_last.property<int64_t>("id").value()
<< ", firstName="
<< it_last.property<std::string>("firstName").value() << std::endl;
// find the vertex with internal id = 100
auto it_find = vertices->find(100);
std::cout << "the vertex with internal id = 100: " << std::endl;
std::cout << it_find.id()
<< ", id=" << it_find.property<int64_t>("id").value()
<< ", firstName="
<< it_find.property<std::string>("firstName").value() << std::endl;
// count
ASSERT(count == vertices->size());
std::cout << "vertex_count=" << count << std::endl;
lixueclaire marked this conversation as resolved.
Show resolved Hide resolved
}

void edges_collection(const GAR_NAMESPACE::GraphInfo& graph_info) {
// construct edges collection
std::string src_label = "person", edge_label = "knows", dst_label = "person";
auto expect = GAR_NAMESPACE::ConstructEdgesCollection(
graph_info, src_label, edge_label, dst_label,
GAR_NAMESPACE::AdjListType::ordered_by_source);
ASSERT(!expect.has_error());
auto edges = expect.value();

// use edges collection
auto begin = edges->begin();
auto end = edges->end();
size_t count = 0;
// iterate through edges collection
for (auto it = begin; it != end; ++it) {
count++;
// print the first 10 edges
if (count > 10) {
continue;
}
// access data through iterator directly
std::cout << "src=" << it.source() << ", dst=" << it.destination() << "; ";
// access data through edge
auto edge = *it;
std::cout << "src=" << edge.source() << ", dst=" << edge.destination()
<< ", creationDate="
<< edge.property<std::string>("creationDate").value()
<< std::endl;
}
// find the first edge with source = 100
auto it_find = edges->find_src(100, begin);
std::cout << "the edge with source = 100: " << std::endl;
do {
std::cout << "src=" << it_find.source() << ", dst=" << it_find.destination()
<< ", creationDate="
<< it_find.property<std::string>("creationDate").value()
<< std::endl;
} while (it_find.next_src());

// count
ASSERT(count == edges->size());
std::cout << "edge_count=" << count << std::endl;
lixueclaire marked this conversation as resolved.
Show resolved Hide resolved
}

int main(int argc, char* argv[]) {
// read file and construct graph info
std::string path =
TEST_DATA_DIR + "/ldbc_sample/parquet/ldbc_sample.graph.yml";
auto graph_info = GAR_NAMESPACE::GraphInfo::Load(path).value();

// vertices collection
std::cout << "Vertices collection" << std::endl;
std::cout << "-------------------" << std::endl;
vertices_collection(graph_info);
std::cout << std::endl;

// edges collection
std::cout << "Edges collection" << std::endl;
std::cout << "----------------" << std::endl;
edges_collection(graph_info);
}
149 changes: 149 additions & 0 deletions cpp/examples/high_level_writer_example.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
/** Copyright 2022 Alibaba Group Holding Limited.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <fstream>
lixueclaire marked this conversation as resolved.
Show resolved Hide resolved
#include <iostream>

#include "arrow/api.h"
#include "arrow/csv/api.h"
#include "arrow/filesystem/api.h"
#include "parquet/arrow/reader.h"

#include "./config.h"
#include "gar/writer/edges_builder.h"
#include "gar/writer/vertices_builder.h"

void vertices_builder() {
// construct vertices builder
std::string vertex_meta_file =
TEST_DATA_DIR + "/ldbc_sample/parquet/" + "person.vertex.yml";
auto vertex_meta = GAR_NAMESPACE::Yaml::LoadFile(vertex_meta_file).value();
auto vertex_info = GAR_NAMESPACE::VertexInfo::Load(vertex_meta).value();
GAR_NAMESPACE::IdType start_index = 0;
GAR_NAMESPACE::builder::VerticesBuilder builder(vertex_info, "/tmp/",
start_index);

// set validate level
builder.SetValidateLevel(GAR_NAMESPACE::ValidateLevel::strong_validate);

// read data from a csv file
std::ifstream fp(TEST_DATA_DIR + "/ldbc_sample/person_0_0.csv");
lixueclaire marked this conversation as resolved.
Show resolved Hide resolved
std::string line;
getline(fp, line);
int m = 4;
std::vector<std::string> names;
std::istringstream readstr(line);
for (int i = 0; i < m; i++) {
std::string name;
getline(readstr, name, '|');
names.push_back(name);
}

// read data and add vertices
while (getline(fp, line)) {
std::string val;
std::istringstream readstr(line);
GAR_NAMESPACE::builder::Vertex v;
for (int i = 0; i < m; i++) {
getline(readstr, val, '|');
if (i == 0) {
int64_t x = 0;
for (size_t j = 0; j < val.length(); j++)
x = x * 10 + val[j] - '0';
v.AddProperty(names[i], x);
} else {
v.AddProperty(names[i], val);
}
}
ASSERT(builder.AddVertex(v).ok());
}

// dump
std::cout << "vertex_count=" << builder.GetNum() << std::endl;
ASSERT(builder.Dump().ok());
std::cout << "dump vertices collection successfully!" << std::endl;

// clear vertices
builder.Clear();
ASSERT(builder.GetNum() == 0);
}

void edges_builder() {
// construct edges builder
std::string edge_meta_file =
TEST_DATA_DIR + "/ldbc_sample/parquet/" + "person_knows_person.edge.yml";
auto edge_meta = GAR_NAMESPACE::Yaml::LoadFile(edge_meta_file).value();
auto edge_info = GAR_NAMESPACE::EdgeInfo::Load(edge_meta).value();
auto vertices_num = 903;
GAR_NAMESPACE::builder::EdgesBuilder builder(
edge_info, "/tmp/", GraphArchive::AdjListType::ordered_by_dest,
vertices_num);

// set validate level
builder.SetValidateLevel(GAR_NAMESPACE::ValidateLevel::strong_validate);

// read data from a csv file
std::ifstream fp(TEST_DATA_DIR + "/ldbc_sample/person_knows_person_0_0.csv");
lixueclaire marked this conversation as resolved.
Show resolved Hide resolved
std::string line;
getline(fp, line);
std::vector<std::string> names;
std::istringstream readstr(line);
std::map<std::string, int64_t> mapping;
int64_t cnt = 0;

// read data and add edges
while (getline(fp, line)) {
std::string val;
std::istringstream readstr(line);
int64_t s = 0, d = 0;
for (int i = 0; i < 3; i++) {
getline(readstr, val, '|');
if (i == 0) {
if (mapping.find(val) == mapping.end())
mapping[val] = cnt++;
s = mapping[val];
} else if (i == 1) {
if (mapping.find(val) == mapping.end())
mapping[val] = cnt++;
d = mapping[val];
} else {
GAR_NAMESPACE::builder::Edge e(s, d);
e.AddProperty("creationDate", val);
ASSERT(builder.AddEdge(e).ok());
}
}
}

// dump
std::cout << "edge_count=" << builder.GetNum() << std::endl;
ASSERT(builder.Dump().ok());
std::cout << "dump edges collection successfully!" << std::endl;

// clear edges
builder.Clear();
ASSERT(builder.GetNum() == 0);
}

int main(int argc, char* argv[]) {
// vertices builder
std::cout << "Vertices builder" << std::endl;
std::cout << "-------------------" << std::endl;
vertices_builder();
std::cout << std::endl;

// edges builder
std::cout << "Edges builder" << std::endl;
std::cout << "----------------" << std::endl;
edges_builder();
}
Loading