diff --git a/cpp/examples/bfs_father_example.cc b/cpp/examples/bfs_father_example.cc index 6fd329ac4..e73babd18 100644 --- a/cpp/examples/bfs_father_example.cc +++ b/cpp/examples/bfs_father_example.cc @@ -155,7 +155,7 @@ int main(int argc, char* argv[]) { assert(new_edge_info.Save("/tmp/person_bfs_person.edge.yml").ok()); GAR_NAMESPACE::builder::EdgesBuilder edges_builder( new_edge_info, "file:///tmp/", - GAR_NAMESPACE::AdjListType::ordered_by_source); + GAR_NAMESPACE::AdjListType::ordered_by_source, num_vertices); for (int i = 0; i < num_vertices; i++) { if (i == root || pre[i] == -1) continue; diff --git a/cpp/include/gar/graph.h b/cpp/include/gar/graph.h index 1512e1c69..ac3760192 100644 --- a/cpp/include/gar/graph.h +++ b/cpp/include/gar/graph.h @@ -638,14 +638,9 @@ class EdgesCollection { IdType vertex_chunk_begin = 0, IdType vertex_chunk_end = std::numeric_limits::max()) : edge_info_(edge_info), prefix_(prefix) { - std::string base_dir; - GAR_ASSIGN_OR_RAISE_ERROR(auto fs, - FileSystemFromUriOrPath(prefix, &base_dir)); - GAR_ASSIGN_OR_RAISE_ERROR(auto adj_list_path_prefix, - edge_info.GetAdjListPathPrefix(adj_list_type_)); - base_dir += adj_list_path_prefix; - GAR_ASSIGN_OR_RAISE_ERROR(auto vertex_chunk_num, - fs->GetFileNumOfDir(base_dir)); + GAR_ASSIGN_OR_RAISE_ERROR( + auto vertex_chunk_num, + utils::GetVertexChunkNum(prefix_, edge_info_, adj_list_type_)); std::vector edge_chunk_nums(vertex_chunk_num, 0); if (vertex_chunk_end == std::numeric_limits::max()) { vertex_chunk_end = vertex_chunk_num; @@ -796,14 +791,9 @@ class EdgesCollection { IdType vertex_chunk_begin = 0, IdType vertex_chunk_end = std::numeric_limits::max()) : edge_info_(edge_info), prefix_(prefix) { - std::string base_dir; - GAR_ASSIGN_OR_RAISE_ERROR(auto fs, - FileSystemFromUriOrPath(prefix, &base_dir)); - GAR_ASSIGN_OR_RAISE_ERROR(auto adj_list_path_prefix, - edge_info.GetAdjListPathPrefix(adj_list_type_)); - base_dir += adj_list_path_prefix; - GAR_ASSIGN_OR_RAISE_ERROR(auto vertex_chunk_num, - fs->GetFileNumOfDir(base_dir)); + GAR_ASSIGN_OR_RAISE_ERROR( + auto vertex_chunk_num, + utils::GetVertexChunkNum(prefix_, edge_info_, adj_list_type_)); std::vector edge_chunk_nums(vertex_chunk_num, 0); if (vertex_chunk_end == std::numeric_limits::max()) { vertex_chunk_end = vertex_chunk_num; @@ -954,14 +944,9 @@ class EdgesCollection { IdType vertex_chunk_begin = 0, IdType vertex_chunk_end = std::numeric_limits::max()) : edge_info_(edge_info), prefix_(prefix) { - std::string base_dir; - GAR_ASSIGN_OR_RAISE_ERROR(auto fs, - FileSystemFromUriOrPath(prefix, &base_dir)); - GAR_ASSIGN_OR_RAISE_ERROR(auto adj_list_path_prefix, - edge_info.GetAdjListPathPrefix(adj_list_type_)); - base_dir += adj_list_path_prefix; - GAR_ASSIGN_OR_RAISE_ERROR(auto vertex_chunk_num, - fs->GetFileNumOfDir(base_dir)); + GAR_ASSIGN_OR_RAISE_ERROR( + auto vertex_chunk_num, + utils::GetVertexChunkNum(prefix_, edge_info_, adj_list_type_)); std::vector edge_chunk_nums(vertex_chunk_num, 0); if (vertex_chunk_end == std::numeric_limits::max()) { vertex_chunk_end = vertex_chunk_num; @@ -1085,14 +1070,9 @@ class EdgesCollection { IdType vertex_chunk_begin = 0, IdType vertex_chunk_end = std::numeric_limits::max()) : edge_info_(edge_info), prefix_(prefix) { - std::string base_dir; - GAR_ASSIGN_OR_RAISE_ERROR(auto fs, - FileSystemFromUriOrPath(prefix, &base_dir)); - GAR_ASSIGN_OR_RAISE_ERROR(auto adj_list_path_prefix, - edge_info.GetAdjListPathPrefix(adj_list_type_)); - base_dir += adj_list_path_prefix; - GAR_ASSIGN_OR_RAISE_ERROR(auto vertex_chunk_num, - fs->GetFileNumOfDir(base_dir)); + GAR_ASSIGN_OR_RAISE_ERROR( + auto vertex_chunk_num, + utils::GetVertexChunkNum(prefix_, edge_info_, adj_list_type_)); std::vector edge_chunk_nums(vertex_chunk_num, 0); if (vertex_chunk_end == std::numeric_limits::max()) { vertex_chunk_end = vertex_chunk_num; diff --git a/cpp/include/gar/graph_info.h b/cpp/include/gar/graph_info.h index c6e1d7b7d..ab5af1183 100644 --- a/cpp/include/gar/graph_info.h +++ b/cpp/include/gar/graph_info.h @@ -743,9 +743,25 @@ class EdgeInfo { } /** - * Get the file path for the number of edgess. + * Get the file path for the number of vertices. + * + * @param adj_list_type The adjacency list type. + * @return A Result object containing the file path for the number of edges, + * or a Status object indicating an error. + */ + inline Result GetVerticesNumFilePath( + AdjListType adj_list_type) const noexcept { + if (!ContainAdjList(adj_list_type)) { + return Status::KeyError("The adj list type is not found in edge info."); + } + return prefix_ + adj_list2prefix_.at(adj_list_type) + "vertex_count"; + } + + /** + * Get the file path for the number of edges. * * @param vertex_chunk_index the vertex chunk index + * @param adj_list_type The adjacency list type. * @return A Result object containing the file path for the number of edges, * or a Status object indicating an error. */ diff --git a/cpp/include/gar/reader/arrow_chunk_reader.h b/cpp/include/gar/reader/arrow_chunk_reader.h index 1e3c9ad8e..6a243e481 100644 --- a/cpp/include/gar/reader/arrow_chunk_reader.h +++ b/cpp/include/gar/reader/arrow_chunk_reader.h @@ -160,8 +160,9 @@ class AdjListArrowChunkReader { GAR_ASSIGN_OR_RAISE_ERROR(auto adj_list_path_prefix, edge_info.GetAdjListPathPrefix(adj_list_type)); base_dir_ = prefix_ + adj_list_path_prefix; - GAR_ASSIGN_OR_RAISE_ERROR(vertex_chunk_num_, - fs_->GetFileNumOfDir(base_dir_)); + GAR_ASSIGN_OR_RAISE_ERROR( + vertex_chunk_num_, + utils::GetVertexChunkNum(prefix_, edge_info_, adj_list_type_)); GAR_ASSIGN_OR_RAISE_ERROR( chunk_num_, utils::GetEdgeChunkNum(prefix_, edge_info_, adj_list_type_, vertex_chunk_index_)); @@ -320,8 +321,9 @@ class AdjListOffsetArrowChunkReader { base_dir_ = prefix_ + dir_path; if (adj_list_type == AdjListType::ordered_by_source || adj_list_type == AdjListType::ordered_by_dest) { - GAR_ASSIGN_OR_RAISE_ERROR(vertex_chunk_num_, - fs_->GetFileNumOfDir(base_dir_)); + GAR_ASSIGN_OR_RAISE_ERROR( + vertex_chunk_num_, + utils::GetVertexChunkNum(prefix_, edge_info_, adj_list_type_)); vertex_chunk_size_ = adj_list_type == AdjListType::ordered_by_source ? edge_info_.GetSrcChunkSize() : edge_info_.GetDstChunkSize(); @@ -422,8 +424,9 @@ class AdjListPropertyArrowChunkReader { auto pg_path_prefix, edge_info.GetPropertyGroupPathPrefix(property_group, adj_list_type)); base_dir_ = prefix_ + pg_path_prefix; - GAR_ASSIGN_OR_RAISE_ERROR(vertex_chunk_num_, - fs_->GetFileNumOfDir(base_dir_)); + GAR_ASSIGN_OR_RAISE_ERROR( + vertex_chunk_num_, + utils::GetVertexChunkNum(prefix_, edge_info_, adj_list_type_)); GAR_ASSIGN_OR_RAISE_ERROR( chunk_num_, utils::GetEdgeChunkNum(prefix_, edge_info_, adj_list_type_, vertex_chunk_index_)); diff --git a/cpp/include/gar/reader/chunk_info_reader.h b/cpp/include/gar/reader/chunk_info_reader.h index 9416e3493..4d4fd856d 100644 --- a/cpp/include/gar/reader/chunk_info_reader.h +++ b/cpp/include/gar/reader/chunk_info_reader.h @@ -135,8 +135,9 @@ class AdjListChunkInfoReader { GAR_ASSIGN_OR_RAISE_ERROR(auto adj_list_path_prefix, edge_info.GetAdjListPathPrefix(adj_list_type)); base_dir_ = prefix_ + adj_list_path_prefix; - GAR_ASSIGN_OR_RAISE_ERROR(vertex_chunk_num_, - fs_->GetFileNumOfDir(base_dir_)); + GAR_ASSIGN_OR_RAISE_ERROR( + vertex_chunk_num_, + utils::GetVertexChunkNum(prefix_, edge_info_, adj_list_type_)); GAR_ASSIGN_OR_RAISE_ERROR( chunk_num_, utils::GetEdgeChunkNum(prefix_, edge_info_, adj_list_type_, vertex_chunk_index_)); @@ -239,8 +240,9 @@ class AdjListPropertyChunkInfoReader { auto pg_path_prefix, edge_info.GetPropertyGroupPathPrefix(property_group, adj_list_type)); base_dir_ = prefix_ + pg_path_prefix; - GAR_ASSIGN_OR_RAISE_ERROR(vertex_chunk_num_, - fs_->GetFileNumOfDir(base_dir_)); + GAR_ASSIGN_OR_RAISE_ERROR( + vertex_chunk_num_, + utils::GetVertexChunkNum(prefix_, edge_info_, adj_list_type_)); GAR_ASSIGN_OR_RAISE_ERROR( chunk_num_, utils::GetEdgeChunkNum(prefix_, edge_info_, adj_list_type_, vertex_chunk_index_)); diff --git a/cpp/include/gar/utils/reader_utils.h b/cpp/include/gar/utils/reader_utils.h index 6251cdb85..c63f36011 100644 --- a/cpp/include/gar/utils/reader_utils.h +++ b/cpp/include/gar/utils/reader_utils.h @@ -32,6 +32,17 @@ Result> GetAdjListOffsetOfVertex( Result GetVertexChunkNum(const std::string& prefix, const VertexInfo& vertex_info) noexcept; +Result GetVertexNum(const std::string& prefix, + const VertexInfo& vertex_info) noexcept; + +Result GetVertexChunkNum(const std::string& prefix, + const EdgeInfo& edge_info, + AdjListType adj_list_type) noexcept; + +Result GetVertexNum(const std::string& prefix, + const EdgeInfo& edge_info, + AdjListType adj_list_type) noexcept; + Result GetEdgeChunkNum(const std::string& prefix, const EdgeInfo& edge_info, AdjListType adj_list_type, diff --git a/cpp/include/gar/writer/arrow_chunk_writer.h b/cpp/include/gar/writer/arrow_chunk_writer.h index 5eec35093..cfc883e31 100644 --- a/cpp/include/gar/writer/arrow_chunk_writer.h +++ b/cpp/include/gar/writer/arrow_chunk_writer.h @@ -284,6 +284,14 @@ class EdgeChunkWriter { Status WriteEdgesNum(IdType vertex_chunk_index, const IdType& count) const noexcept; + /** + * @brief Write the number of vertices into the file. + * + * @param count The number of vertices. + * @return Status: ok or error. + */ + Status WriteVerticesNum(const IdType& count) const noexcept; + /** * @brief Copy a file as a offset chunk. * diff --git a/cpp/include/gar/writer/edges_builder.h b/cpp/include/gar/writer/edges_builder.h index 8a140a58b..ff20f156d 100644 --- a/cpp/include/gar/writer/edges_builder.h +++ b/cpp/include/gar/writer/edges_builder.h @@ -148,10 +148,8 @@ class EdgesBuilder { * @param adj_list_type The adj list type of the edges. * @param num_vertices The total number of vertices for source or destination. */ - explicit EdgesBuilder( - const EdgeInfo edge_info, const std::string& prefix, - AdjListType adj_list_type = AdjListType::unordered_by_source, - IdType num_vertices = -1) + explicit EdgesBuilder(const EdgeInfo edge_info, const std::string& prefix, + AdjListType adj_list_type, IdType num_vertices) : edge_info_(edge_info), prefix_(prefix), adj_list_type_(adj_list_type), @@ -253,15 +251,13 @@ class EdgesBuilder { // construct the writer EdgeChunkWriter writer(edge_info_, prefix_, adj_list_type_); // construct empty edge collections for vertex chunks without edges - if (num_vertices_ != -1) { - IdType num_vertex_chunks = - (num_vertices_ + vertex_chunk_size_ - 1) / vertex_chunk_size_; - for (IdType i = 0; i < num_vertex_chunks; i++) - if (edges_.find(i) == edges_.end()) { - std::vector empty_chunk_edges; - edges_[i] = empty_chunk_edges; - } - } + IdType num_vertex_chunks = + (num_vertices_ + vertex_chunk_size_ - 1) / vertex_chunk_size_; + for (IdType i = 0; i < num_vertex_chunks; i++) + if (edges_.find(i) == edges_.end()) { + std::vector empty_chunk_edges; + edges_[i] = empty_chunk_edges; + } // dump the offsets if (adj_list_type_ == AdjListType::ordered_by_source || adj_list_type_ == AdjListType::ordered_by_dest) { @@ -280,6 +276,8 @@ class EdgesBuilder { writer.WriteOffsetChunk(offset_table, vertex_chunk_index)); } } + // dump the vertex num + GAR_RETURN_NOT_OK(writer.WriteVerticesNum(num_vertices_)); // dump the edge nums IdType vertex_chunk_num = (num_vertices_ + vertex_chunk_size_ - 1) / vertex_chunk_size_; diff --git a/cpp/src/arrow_chunk_writer.cc b/cpp/src/arrow_chunk_writer.cc index 067ed3902..0facfe1c1 100644 --- a/cpp/src/arrow_chunk_writer.cc +++ b/cpp/src/arrow_chunk_writer.cc @@ -345,6 +345,13 @@ Status EdgeChunkWriter::WriteEdgesNum(IdType vertex_chunk_index, return fs_->WriteValueToFile(count, path); } +Status EdgeChunkWriter::WriteVerticesNum(const IdType& count) const noexcept { + GAR_ASSIGN_OR_RAISE(auto suffix, + edge_info_.GetVerticesNumFilePath(adj_list_type_)); + std::string path = prefix_ + suffix; + return fs_->WriteValueToFile(count, path); +} + Status EdgeChunkWriter::WriteOffsetChunk(const std::string& file_name, IdType vertex_chunk_index) const noexcept { diff --git a/cpp/src/reader_utils.cc b/cpp/src/reader_utils.cc index 9789276cf..d696f4998 100644 --- a/cpp/src/reader_utils.cc +++ b/cpp/src/reader_utils.cc @@ -80,6 +80,51 @@ Result GetVertexChunkNum(const std::string& prefix, vertex_info.GetChunkSize(); } +Result GetVertexNum(const std::string& prefix, + const VertexInfo& vertex_info) noexcept { + std::string out_prefix; + GAR_ASSIGN_OR_RAISE(auto fs, FileSystemFromUriOrPath(prefix, &out_prefix)); + GAR_ASSIGN_OR_RAISE(auto vertex_num_file_suffix, + vertex_info.GetVerticesNumFilePath()); + std::string vertex_num_file_path = out_prefix + vertex_num_file_suffix; + GAR_ASSIGN_OR_RAISE(auto vertex_num, + fs->ReadFileToValue(vertex_num_file_path)); + return vertex_num; +} + +Result GetVertexChunkNum(const std::string& prefix, + const EdgeInfo& edge_info, + AdjListType adj_list_type) noexcept { + std::string out_prefix; + GAR_ASSIGN_OR_RAISE(auto fs, FileSystemFromUriOrPath(prefix, &out_prefix)); + GAR_ASSIGN_OR_RAISE(auto vertex_num_file_suffix, + edge_info.GetVerticesNumFilePath(adj_list_type)); + std::string vertex_num_file_path = out_prefix + vertex_num_file_suffix; + GAR_ASSIGN_OR_RAISE(auto vertex_num, + fs->ReadFileToValue(vertex_num_file_path)); + IdType chunk_size; + if (adj_list_type == AdjListType::ordered_by_source || + adj_list_type == AdjListType::unordered_by_source) { + chunk_size = edge_info.GetSrcChunkSize(); + } else { + chunk_size = edge_info.GetDstChunkSize(); + } + return (vertex_num + chunk_size - 1) / chunk_size; +} + +Result GetVertexNum(const std::string& prefix, + const EdgeInfo& edge_info, + AdjListType adj_list_type) noexcept { + std::string out_prefix; + GAR_ASSIGN_OR_RAISE(auto fs, FileSystemFromUriOrPath(prefix, &out_prefix)); + GAR_ASSIGN_OR_RAISE(auto vertex_num_file_suffix, + edge_info.GetVerticesNumFilePath(adj_list_type)); + std::string vertex_num_file_path = out_prefix + vertex_num_file_suffix; + GAR_ASSIGN_OR_RAISE(auto vertex_num, + fs->ReadFileToValue(vertex_num_file_path)); + return vertex_num; +} + Result GetEdgeChunkNum(const std::string& prefix, const EdgeInfo& edge_info, AdjListType adj_list_type, diff --git a/cpp/test/test_arrow_chunk_writer.cc b/cpp/test/test_arrow_chunk_writer.cc index 571954d5b..5c3113a31 100644 --- a/cpp/test/test_arrow_chunk_writer.cc +++ b/cpp/test/test_arrow_chunk_writer.cc @@ -193,9 +193,21 @@ TEST_CASE("test_edge_chunk_writer") { fs->OpenInputStream( "/tmp/edge/person_knows_person/ordered_by_source/edge_count0") .ValueOrDie(); - auto num = input2->Read(sizeof(GAR_NAMESPACE::IdType)).ValueOrDie(); - GAR_NAMESPACE::IdType* ptr = (GAR_NAMESPACE::IdType*) num->data(); - REQUIRE((*ptr) == table->num_rows()); + auto edge_num = input2->Read(sizeof(GAR_NAMESPACE::IdType)).ValueOrDie(); + GAR_NAMESPACE::IdType* edge_num_ptr = + (GAR_NAMESPACE::IdType*) edge_num->data(); + REQUIRE((*edge_num_ptr) == table->num_rows()); + + // Write number of vertices + REQUIRE(writer.WriteVerticesNum(903).ok()); + std::shared_ptr input3 = + fs->OpenInputStream( + "/tmp/edge/person_knows_person/ordered_by_source/vertex_count") + .ValueOrDie(); + auto vertex_num = input3->Read(sizeof(GAR_NAMESPACE::IdType)).ValueOrDie(); + GAR_NAMESPACE::IdType* vertex_num_ptr = + (GAR_NAMESPACE::IdType*) vertex_num->data(); + REQUIRE((*vertex_num_ptr) == 903); // Set validate level REQUIRE(writer.GetValidateLevel() == diff --git a/cpp/test/test_info.cc b/cpp/test/test_info.cc index 046d73b41..aebc31353 100644 --- a/cpp/test/test_info.cc +++ b/cpp/test/test_info.cc @@ -294,6 +294,9 @@ TEST_CASE("test_edge_info") { REQUIRE(edge_info.GetEdgesNumFilePath(0, adj_list_type_not_exist) .status() .IsKeyError()); + REQUIRE(edge_info.GetVerticesNumFilePath(adj_list_type_not_exist) + .status() + .IsKeyError()); // edge count file path auto maybe_path = edge_info.GetEdgesNumFilePath(0, adj_list_type); @@ -301,6 +304,12 @@ TEST_CASE("test_edge_info") { REQUIRE(maybe_path.value() == edge_info.GetPrefix() + prefix_of_adj_list_type + "edge_count0"); + // vertex count file path + auto maybe_path_2 = edge_info.GetVerticesNumFilePath(adj_list_type); + REQUIRE(!maybe_path_2.has_error()); + REQUIRE(maybe_path_2.value() == + edge_info.GetPrefix() + prefix_of_adj_list_type + "vertex_count"); + // test save std::string save_path(std::tmpnam(nullptr)); REQUIRE(edge_info.Save(save_path).ok()); diff --git a/docs/user-guide/getting-started.rst b/docs/user-guide/getting-started.rst index 2d338a90a..068539c1a 100644 --- a/docs/user-guide/getting-started.rst +++ b/docs/user-guide/getting-started.rst @@ -156,7 +156,8 @@ As the simplest cases, the fist example below adds vertices to **VerticesBuilder edge_info = ... prefix = ... - GraphArchive::builder::EdgesBuilder builder(edge_info, prefix, GraphArchive::AdjListType::ordered_by_source); + vertices_num = ... + GraphArchive::builder::EdgesBuilder builder(edge_info, prefix, GraphArchive::AdjListType::ordered_by_source, vertices_num); // add an edge (0 -> 3) GraphArchive::builder::Edge e(0, 3); diff --git a/testing b/testing index da043b706..78b39a74f 160000 --- a/testing +++ b/testing @@ -1 +1 @@ -Subproject commit da043b7064487fbcd97e9b2549956c49ab97c129 +Subproject commit 78b39a74fc42c0eb8b47cc0c4f38745fa394700f