Skip to content

Commit

Permalink
Merge branch 'main' into 141-single-header-yaml
Browse files Browse the repository at this point in the history
  • Loading branch information
acezen authored Apr 21, 2023
2 parents 0969074 + 80cb7e7 commit 862b98e
Show file tree
Hide file tree
Showing 14 changed files with 154 additions and 62 deletions.
2 changes: 1 addition & 1 deletion cpp/examples/bfs_father_example.cc
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ int main(int argc, char* argv[]) {
assert(new_edge_info.Save("/tmp/person_bfs_person.edge.yml").ok());
GAR_NAMESPACE::builder::EdgesBuilder edges_builder(
new_edge_info, "file:///tmp/",
GAR_NAMESPACE::AdjListType::ordered_by_source);
GAR_NAMESPACE::AdjListType::ordered_by_source, num_vertices);
for (int i = 0; i < num_vertices; i++) {
if (i == root || pre[i] == -1)
continue;
Expand Down
44 changes: 12 additions & 32 deletions cpp/include/gar/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -638,14 +638,9 @@ class EdgesCollection<AdjListType::ordered_by_source> {
IdType vertex_chunk_begin = 0,
IdType vertex_chunk_end = std::numeric_limits<int64_t>::max())
: edge_info_(edge_info), prefix_(prefix) {
std::string base_dir;
GAR_ASSIGN_OR_RAISE_ERROR(auto fs,
FileSystemFromUriOrPath(prefix, &base_dir));
GAR_ASSIGN_OR_RAISE_ERROR(auto adj_list_path_prefix,
edge_info.GetAdjListPathPrefix(adj_list_type_));
base_dir += adj_list_path_prefix;
GAR_ASSIGN_OR_RAISE_ERROR(auto vertex_chunk_num,
fs->GetFileNumOfDir(base_dir));
GAR_ASSIGN_OR_RAISE_ERROR(
auto vertex_chunk_num,
utils::GetVertexChunkNum(prefix_, edge_info_, adj_list_type_));
std::vector<IdType> edge_chunk_nums(vertex_chunk_num, 0);
if (vertex_chunk_end == std::numeric_limits<int64_t>::max()) {
vertex_chunk_end = vertex_chunk_num;
Expand Down Expand Up @@ -796,14 +791,9 @@ class EdgesCollection<AdjListType::ordered_by_dest> {
IdType vertex_chunk_begin = 0,
IdType vertex_chunk_end = std::numeric_limits<int64_t>::max())
: edge_info_(edge_info), prefix_(prefix) {
std::string base_dir;
GAR_ASSIGN_OR_RAISE_ERROR(auto fs,
FileSystemFromUriOrPath(prefix, &base_dir));
GAR_ASSIGN_OR_RAISE_ERROR(auto adj_list_path_prefix,
edge_info.GetAdjListPathPrefix(adj_list_type_));
base_dir += adj_list_path_prefix;
GAR_ASSIGN_OR_RAISE_ERROR(auto vertex_chunk_num,
fs->GetFileNumOfDir(base_dir));
GAR_ASSIGN_OR_RAISE_ERROR(
auto vertex_chunk_num,
utils::GetVertexChunkNum(prefix_, edge_info_, adj_list_type_));
std::vector<IdType> edge_chunk_nums(vertex_chunk_num, 0);
if (vertex_chunk_end == std::numeric_limits<int64_t>::max()) {
vertex_chunk_end = vertex_chunk_num;
Expand Down Expand Up @@ -954,14 +944,9 @@ class EdgesCollection<AdjListType::unordered_by_source> {
IdType vertex_chunk_begin = 0,
IdType vertex_chunk_end = std::numeric_limits<int64_t>::max())
: edge_info_(edge_info), prefix_(prefix) {
std::string base_dir;
GAR_ASSIGN_OR_RAISE_ERROR(auto fs,
FileSystemFromUriOrPath(prefix, &base_dir));
GAR_ASSIGN_OR_RAISE_ERROR(auto adj_list_path_prefix,
edge_info.GetAdjListPathPrefix(adj_list_type_));
base_dir += adj_list_path_prefix;
GAR_ASSIGN_OR_RAISE_ERROR(auto vertex_chunk_num,
fs->GetFileNumOfDir(base_dir));
GAR_ASSIGN_OR_RAISE_ERROR(
auto vertex_chunk_num,
utils::GetVertexChunkNum(prefix_, edge_info_, adj_list_type_));
std::vector<IdType> edge_chunk_nums(vertex_chunk_num, 0);
if (vertex_chunk_end == std::numeric_limits<int64_t>::max()) {
vertex_chunk_end = vertex_chunk_num;
Expand Down Expand Up @@ -1085,14 +1070,9 @@ class EdgesCollection<AdjListType::unordered_by_dest> {
IdType vertex_chunk_begin = 0,
IdType vertex_chunk_end = std::numeric_limits<int64_t>::max())
: edge_info_(edge_info), prefix_(prefix) {
std::string base_dir;
GAR_ASSIGN_OR_RAISE_ERROR(auto fs,
FileSystemFromUriOrPath(prefix, &base_dir));
GAR_ASSIGN_OR_RAISE_ERROR(auto adj_list_path_prefix,
edge_info.GetAdjListPathPrefix(adj_list_type_));
base_dir += adj_list_path_prefix;
GAR_ASSIGN_OR_RAISE_ERROR(auto vertex_chunk_num,
fs->GetFileNumOfDir(base_dir));
GAR_ASSIGN_OR_RAISE_ERROR(
auto vertex_chunk_num,
utils::GetVertexChunkNum(prefix_, edge_info_, adj_list_type_));
std::vector<IdType> edge_chunk_nums(vertex_chunk_num, 0);
if (vertex_chunk_end == std::numeric_limits<int64_t>::max()) {
vertex_chunk_end = vertex_chunk_num;
Expand Down
18 changes: 17 additions & 1 deletion cpp/include/gar/graph_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -743,9 +743,25 @@ class EdgeInfo {
}

/**
* Get the file path for the number of edgess.
* Get the file path for the number of vertices.
*
* @param adj_list_type The adjacency list type.
* @return A Result object containing the file path for the number of edges,
* or a Status object indicating an error.
*/
inline Result<std::string> GetVerticesNumFilePath(
AdjListType adj_list_type) const noexcept {
if (!ContainAdjList(adj_list_type)) {
return Status::KeyError("The adj list type is not found in edge info.");
}
return prefix_ + adj_list2prefix_.at(adj_list_type) + "vertex_count";
}

/**
* Get the file path for the number of edges.
*
* @param vertex_chunk_index the vertex chunk index
* @param adj_list_type The adjacency list type.
* @return A Result object containing the file path for the number of edges,
* or a Status object indicating an error.
*/
Expand Down
15 changes: 9 additions & 6 deletions cpp/include/gar/reader/arrow_chunk_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,9 @@ class AdjListArrowChunkReader {
GAR_ASSIGN_OR_RAISE_ERROR(auto adj_list_path_prefix,
edge_info.GetAdjListPathPrefix(adj_list_type));
base_dir_ = prefix_ + adj_list_path_prefix;
GAR_ASSIGN_OR_RAISE_ERROR(vertex_chunk_num_,
fs_->GetFileNumOfDir(base_dir_));
GAR_ASSIGN_OR_RAISE_ERROR(
vertex_chunk_num_,
utils::GetVertexChunkNum(prefix_, edge_info_, adj_list_type_));
GAR_ASSIGN_OR_RAISE_ERROR(
chunk_num_, utils::GetEdgeChunkNum(prefix_, edge_info_, adj_list_type_,
vertex_chunk_index_));
Expand Down Expand Up @@ -320,8 +321,9 @@ class AdjListOffsetArrowChunkReader {
base_dir_ = prefix_ + dir_path;
if (adj_list_type == AdjListType::ordered_by_source ||
adj_list_type == AdjListType::ordered_by_dest) {
GAR_ASSIGN_OR_RAISE_ERROR(vertex_chunk_num_,
fs_->GetFileNumOfDir(base_dir_));
GAR_ASSIGN_OR_RAISE_ERROR(
vertex_chunk_num_,
utils::GetVertexChunkNum(prefix_, edge_info_, adj_list_type_));
vertex_chunk_size_ = adj_list_type == AdjListType::ordered_by_source
? edge_info_.GetSrcChunkSize()
: edge_info_.GetDstChunkSize();
Expand Down Expand Up @@ -422,8 +424,9 @@ class AdjListPropertyArrowChunkReader {
auto pg_path_prefix,
edge_info.GetPropertyGroupPathPrefix(property_group, adj_list_type));
base_dir_ = prefix_ + pg_path_prefix;
GAR_ASSIGN_OR_RAISE_ERROR(vertex_chunk_num_,
fs_->GetFileNumOfDir(base_dir_));
GAR_ASSIGN_OR_RAISE_ERROR(
vertex_chunk_num_,
utils::GetVertexChunkNum(prefix_, edge_info_, adj_list_type_));
GAR_ASSIGN_OR_RAISE_ERROR(
chunk_num_, utils::GetEdgeChunkNum(prefix_, edge_info_, adj_list_type_,
vertex_chunk_index_));
Expand Down
10 changes: 6 additions & 4 deletions cpp/include/gar/reader/chunk_info_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,9 @@ class AdjListChunkInfoReader {
GAR_ASSIGN_OR_RAISE_ERROR(auto adj_list_path_prefix,
edge_info.GetAdjListPathPrefix(adj_list_type));
base_dir_ = prefix_ + adj_list_path_prefix;
GAR_ASSIGN_OR_RAISE_ERROR(vertex_chunk_num_,
fs_->GetFileNumOfDir(base_dir_));
GAR_ASSIGN_OR_RAISE_ERROR(
vertex_chunk_num_,
utils::GetVertexChunkNum(prefix_, edge_info_, adj_list_type_));
GAR_ASSIGN_OR_RAISE_ERROR(
chunk_num_, utils::GetEdgeChunkNum(prefix_, edge_info_, adj_list_type_,
vertex_chunk_index_));
Expand Down Expand Up @@ -239,8 +240,9 @@ class AdjListPropertyChunkInfoReader {
auto pg_path_prefix,
edge_info.GetPropertyGroupPathPrefix(property_group, adj_list_type));
base_dir_ = prefix_ + pg_path_prefix;
GAR_ASSIGN_OR_RAISE_ERROR(vertex_chunk_num_,
fs_->GetFileNumOfDir(base_dir_));
GAR_ASSIGN_OR_RAISE_ERROR(
vertex_chunk_num_,
utils::GetVertexChunkNum(prefix_, edge_info_, adj_list_type_));
GAR_ASSIGN_OR_RAISE_ERROR(
chunk_num_, utils::GetEdgeChunkNum(prefix_, edge_info_, adj_list_type_,
vertex_chunk_index_));
Expand Down
11 changes: 11 additions & 0 deletions cpp/include/gar/utils/reader_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,17 @@ Result<std::pair<IdType, IdType>> GetAdjListOffsetOfVertex(
Result<IdType> GetVertexChunkNum(const std::string& prefix,
const VertexInfo& vertex_info) noexcept;

Result<IdType> GetVertexNum(const std::string& prefix,
const VertexInfo& vertex_info) noexcept;

Result<IdType> GetVertexChunkNum(const std::string& prefix,
const EdgeInfo& edge_info,
AdjListType adj_list_type) noexcept;

Result<IdType> GetVertexNum(const std::string& prefix,
const EdgeInfo& edge_info,
AdjListType adj_list_type) noexcept;

Result<IdType> GetEdgeChunkNum(const std::string& prefix,
const EdgeInfo& edge_info,
AdjListType adj_list_type,
Expand Down
8 changes: 8 additions & 0 deletions cpp/include/gar/writer/arrow_chunk_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,14 @@ class EdgeChunkWriter {
Status WriteEdgesNum(IdType vertex_chunk_index, const IdType& count) const
noexcept;

/**
* @brief Write the number of vertices into the file.
*
* @param count The number of vertices.
* @return Status: ok or error.
*/
Status WriteVerticesNum(const IdType& count) const noexcept;

/**
* @brief Copy a file as a offset chunk.
*
Expand Down
24 changes: 11 additions & 13 deletions cpp/include/gar/writer/edges_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,10 +148,8 @@ class EdgesBuilder {
* @param adj_list_type The adj list type of the edges.
* @param num_vertices The total number of vertices for source or destination.
*/
explicit EdgesBuilder(
const EdgeInfo edge_info, const std::string& prefix,
AdjListType adj_list_type = AdjListType::unordered_by_source,
IdType num_vertices = -1)
explicit EdgesBuilder(const EdgeInfo edge_info, const std::string& prefix,
AdjListType adj_list_type, IdType num_vertices)
: edge_info_(edge_info),
prefix_(prefix),
adj_list_type_(adj_list_type),
Expand Down Expand Up @@ -253,15 +251,13 @@ class EdgesBuilder {
// construct the writer
EdgeChunkWriter writer(edge_info_, prefix_, adj_list_type_);
// construct empty edge collections for vertex chunks without edges
if (num_vertices_ != -1) {
IdType num_vertex_chunks =
(num_vertices_ + vertex_chunk_size_ - 1) / vertex_chunk_size_;
for (IdType i = 0; i < num_vertex_chunks; i++)
if (edges_.find(i) == edges_.end()) {
std::vector<Edge> empty_chunk_edges;
edges_[i] = empty_chunk_edges;
}
}
IdType num_vertex_chunks =
(num_vertices_ + vertex_chunk_size_ - 1) / vertex_chunk_size_;
for (IdType i = 0; i < num_vertex_chunks; i++)
if (edges_.find(i) == edges_.end()) {
std::vector<Edge> empty_chunk_edges;
edges_[i] = empty_chunk_edges;
}
// dump the offsets
if (adj_list_type_ == AdjListType::ordered_by_source ||
adj_list_type_ == AdjListType::ordered_by_dest) {
Expand All @@ -280,6 +276,8 @@ class EdgesBuilder {
writer.WriteOffsetChunk(offset_table, vertex_chunk_index));
}
}
// dump the vertex num
GAR_RETURN_NOT_OK(writer.WriteVerticesNum(num_vertices_));
// dump the edge nums
IdType vertex_chunk_num =
(num_vertices_ + vertex_chunk_size_ - 1) / vertex_chunk_size_;
Expand Down
7 changes: 7 additions & 0 deletions cpp/src/arrow_chunk_writer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,13 @@ Status EdgeChunkWriter::WriteEdgesNum(IdType vertex_chunk_index,
return fs_->WriteValueToFile<IdType>(count, path);
}

Status EdgeChunkWriter::WriteVerticesNum(const IdType& count) const noexcept {
GAR_ASSIGN_OR_RAISE(auto suffix,
edge_info_.GetVerticesNumFilePath(adj_list_type_));
std::string path = prefix_ + suffix;
return fs_->WriteValueToFile<IdType>(count, path);
}

Status EdgeChunkWriter::WriteOffsetChunk(const std::string& file_name,
IdType vertex_chunk_index) const
noexcept {
Expand Down
45 changes: 45 additions & 0 deletions cpp/src/reader_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,51 @@ Result<IdType> GetVertexChunkNum(const std::string& prefix,
vertex_info.GetChunkSize();
}

Result<IdType> GetVertexNum(const std::string& prefix,
const VertexInfo& vertex_info) noexcept {
std::string out_prefix;
GAR_ASSIGN_OR_RAISE(auto fs, FileSystemFromUriOrPath(prefix, &out_prefix));
GAR_ASSIGN_OR_RAISE(auto vertex_num_file_suffix,
vertex_info.GetVerticesNumFilePath());
std::string vertex_num_file_path = out_prefix + vertex_num_file_suffix;
GAR_ASSIGN_OR_RAISE(auto vertex_num,
fs->ReadFileToValue<IdType>(vertex_num_file_path));
return vertex_num;
}

Result<IdType> GetVertexChunkNum(const std::string& prefix,
const EdgeInfo& edge_info,
AdjListType adj_list_type) noexcept {
std::string out_prefix;
GAR_ASSIGN_OR_RAISE(auto fs, FileSystemFromUriOrPath(prefix, &out_prefix));
GAR_ASSIGN_OR_RAISE(auto vertex_num_file_suffix,
edge_info.GetVerticesNumFilePath(adj_list_type));
std::string vertex_num_file_path = out_prefix + vertex_num_file_suffix;
GAR_ASSIGN_OR_RAISE(auto vertex_num,
fs->ReadFileToValue<IdType>(vertex_num_file_path));
IdType chunk_size;
if (adj_list_type == AdjListType::ordered_by_source ||
adj_list_type == AdjListType::unordered_by_source) {
chunk_size = edge_info.GetSrcChunkSize();
} else {
chunk_size = edge_info.GetDstChunkSize();
}
return (vertex_num + chunk_size - 1) / chunk_size;
}

Result<IdType> GetVertexNum(const std::string& prefix,
const EdgeInfo& edge_info,
AdjListType adj_list_type) noexcept {
std::string out_prefix;
GAR_ASSIGN_OR_RAISE(auto fs, FileSystemFromUriOrPath(prefix, &out_prefix));
GAR_ASSIGN_OR_RAISE(auto vertex_num_file_suffix,
edge_info.GetVerticesNumFilePath(adj_list_type));
std::string vertex_num_file_path = out_prefix + vertex_num_file_suffix;
GAR_ASSIGN_OR_RAISE(auto vertex_num,
fs->ReadFileToValue<IdType>(vertex_num_file_path));
return vertex_num;
}

Result<IdType> GetEdgeChunkNum(const std::string& prefix,
const EdgeInfo& edge_info,
AdjListType adj_list_type,
Expand Down
18 changes: 15 additions & 3 deletions cpp/test/test_arrow_chunk_writer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -193,9 +193,21 @@ TEST_CASE("test_edge_chunk_writer") {
fs->OpenInputStream(
"/tmp/edge/person_knows_person/ordered_by_source/edge_count0")
.ValueOrDie();
auto num = input2->Read(sizeof(GAR_NAMESPACE::IdType)).ValueOrDie();
GAR_NAMESPACE::IdType* ptr = (GAR_NAMESPACE::IdType*) num->data();
REQUIRE((*ptr) == table->num_rows());
auto edge_num = input2->Read(sizeof(GAR_NAMESPACE::IdType)).ValueOrDie();
GAR_NAMESPACE::IdType* edge_num_ptr =
(GAR_NAMESPACE::IdType*) edge_num->data();
REQUIRE((*edge_num_ptr) == table->num_rows());

// Write number of vertices
REQUIRE(writer.WriteVerticesNum(903).ok());
std::shared_ptr<arrow::io::InputStream> input3 =
fs->OpenInputStream(
"/tmp/edge/person_knows_person/ordered_by_source/vertex_count")
.ValueOrDie();
auto vertex_num = input3->Read(sizeof(GAR_NAMESPACE::IdType)).ValueOrDie();
GAR_NAMESPACE::IdType* vertex_num_ptr =
(GAR_NAMESPACE::IdType*) vertex_num->data();
REQUIRE((*vertex_num_ptr) == 903);

// Set validate level
REQUIRE(writer.GetValidateLevel() ==
Expand Down
9 changes: 9 additions & 0 deletions cpp/test/test_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -294,13 +294,22 @@ TEST_CASE("test_edge_info") {
REQUIRE(edge_info.GetEdgesNumFilePath(0, adj_list_type_not_exist)
.status()
.IsKeyError());
REQUIRE(edge_info.GetVerticesNumFilePath(adj_list_type_not_exist)
.status()
.IsKeyError());

// edge count file path
auto maybe_path = edge_info.GetEdgesNumFilePath(0, adj_list_type);
REQUIRE(!maybe_path.has_error());
REQUIRE(maybe_path.value() ==
edge_info.GetPrefix() + prefix_of_adj_list_type + "edge_count0");

// vertex count file path
auto maybe_path_2 = edge_info.GetVerticesNumFilePath(adj_list_type);
REQUIRE(!maybe_path_2.has_error());
REQUIRE(maybe_path_2.value() ==
edge_info.GetPrefix() + prefix_of_adj_list_type + "vertex_count");

// test save
std::string save_path(std::tmpnam(nullptr));
REQUIRE(edge_info.Save(save_path).ok());
Expand Down
3 changes: 2 additions & 1 deletion docs/user-guide/getting-started.rst
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,8 @@ As the simplest cases, the fist example below adds vertices to **VerticesBuilder

edge_info = ...
prefix = ...
GraphArchive::builder::EdgesBuilder builder(edge_info, prefix, GraphArchive::AdjListType::ordered_by_source);
vertices_num = ...
GraphArchive::builder::EdgesBuilder builder(edge_info, prefix, GraphArchive::AdjListType::ordered_by_source, vertices_num);

// add an edge (0 -> 3)
GraphArchive::builder::Edge e(0, 3);
Expand Down
2 changes: 1 addition & 1 deletion testing

0 comments on commit 862b98e

Please sign in to comment.