From 4d092b38f12e8df4d55003bcbc625e3fd979de5a Mon Sep 17 00:00:00 2001 From: Weibin Zeng Date: Sun, 4 Feb 2024 17:39:46 +0800 Subject: [PATCH] [Feat][Format][C++] Support extra info in graph info (#356) * [Feat][Format][C++] Support extra info in graph info --- cpp/include/gar/fwd.h | 4 ++- cpp/include/gar/graph_info.h | 18 ++++++++++--- cpp/src/graph_info.cc | 51 ++++++++++++++++++++++++++++-------- cpp/test/test_info.cc | 33 +++++++++++++++++++++-- docs/file-format.rst | 3 ++- 5 files changed, 90 insertions(+), 19 deletions(-) diff --git a/cpp/include/gar/fwd.h b/cpp/include/gar/fwd.h index da05f2b9d..ffaf707e5 100644 --- a/cpp/include/gar/fwd.h +++ b/cpp/include/gar/fwd.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include "gar/external/result.hpp" @@ -115,7 +116,8 @@ std::shared_ptr CreateEdgeInfo( std::shared_ptr CreateGraphInfo( const std::string& name, const VertexInfoVector& vertex_infos, const EdgeInfoVector& edge_infos, const std::string& prefix, - std::shared_ptr version = nullptr); + std::shared_ptr version = nullptr, + const std::unordered_map& extra_info = {}); const std::shared_ptr& boolean(); const std::shared_ptr& int32(); diff --git a/cpp/include/gar/graph_info.h b/cpp/include/gar/graph_info.h index 1d6674562..0948446a7 100644 --- a/cpp/include/gar/graph_info.h +++ b/cpp/include/gar/graph_info.h @@ -19,6 +19,7 @@ #include #include +#include #include #include "gar/fwd.h" @@ -673,11 +674,13 @@ class GraphInfo { * @param prefix The absolute path prefix to store chunk files of the graph. * Defaults to "./". * @param version The version of the graph info. + * @param extra_info The extra metadata of the graph info. */ - explicit GraphInfo(const std::string& graph_name, - VertexInfoVector vertex_infos, EdgeInfoVector edge_infos, - const std::string& prefix = "./", - std::shared_ptr version = nullptr); + explicit GraphInfo( + const std::string& graph_name, VertexInfoVector vertex_infos, + EdgeInfoVector edge_infos, const std::string& prefix = "./", + std::shared_ptr version = nullptr, + const std::unordered_map& extra_info = {}); /** * @brief Loads the input file as a `GraphInfo` instance. @@ -738,6 +741,13 @@ class GraphInfo { */ const std::shared_ptr& version() const; + /** + * @brief Get the extra metadata of the graph info object. + * + * @return The extra metadata of the graph info object. + */ + const std::unordered_map& GetExtraInfo() const; + /** * @brief Get the vertex info with the given label. * @param label The label of the vertex. diff --git a/cpp/src/graph_info.cc b/cpp/src/graph_info.cc index 63d83cf83..69f4d557d 100644 --- a/cpp/src/graph_info.cc +++ b/cpp/src/graph_info.cc @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include "yaml/Yaml.hpp" @@ -956,6 +955,16 @@ static Result> ConstructGraphInfo( version, InfoVersion::Parse( graph_meta->operator[]("version").As())); } + std::unordered_map extra_info; + if (!graph_meta->operator[]("extra_info").IsNone()) { + auto& extra_info_node = graph_meta->operator[]("extra_info"); + for (auto it = extra_info_node.Begin(); it != extra_info_node.End(); it++) { + auto node = (*it).second; + auto key = node["key"].As(); + auto value = node["value"].As(); + extra_info.emplace(key, value); + } + } VertexInfoVector vertex_infos; EdgeInfoVector edge_infos; @@ -983,7 +992,7 @@ static Result> ConstructGraphInfo( } } return std::make_shared(name, vertex_infos, edge_infos, prefix, - version); + version, extra_info); } } // namespace @@ -992,12 +1001,14 @@ class GraphInfo::Impl { public: Impl(const std::string& graph_name, VertexInfoVector vertex_infos, EdgeInfoVector edge_infos, const std::string& prefix, - std::shared_ptr version) + std::shared_ptr version, + const std::unordered_map& extra_info) : name_(graph_name), vertex_infos_(std::move(vertex_infos)), edge_infos_(std::move(edge_infos)), prefix_(prefix), - version_(std::move(version)) { + version_(std::move(version)), + extra_info_(extra_info) { for (int i = 0; i < vertex_infos_.size(); i++) { vlabel_to_index_[vertex_infos_[i]->GetLabel()] = i; } @@ -1035,16 +1046,18 @@ class GraphInfo::Impl { EdgeInfoVector edge_infos_; std::string prefix_; std::shared_ptr version_; + std::unordered_map extra_info_; std::unordered_map vlabel_to_index_; std::unordered_map elabel_to_index_; }; -GraphInfo::GraphInfo(const std::string& graph_name, - VertexInfoVector vertex_infos, EdgeInfoVector edge_infos, - const std::string& prefix, - std::shared_ptr version) +GraphInfo::GraphInfo( + const std::string& graph_name, VertexInfoVector vertex_infos, + EdgeInfoVector edge_infos, const std::string& prefix, + std::shared_ptr version, + const std::unordered_map& extra_info) : impl_(new Impl(graph_name, std::move(vertex_infos), std::move(edge_infos), - prefix, version)) {} + prefix, version, extra_info)) {} const std::string& GraphInfo::GetName() const { return impl_->name_; } @@ -1054,6 +1067,11 @@ const std::shared_ptr& GraphInfo::version() const { return impl_->version_; } +const std::unordered_map& GraphInfo::GetExtraInfo() + const { + return impl_->extra_info_; +} + std::shared_ptr GraphInfo::GetVertexInfo( const std::string& type) const { int i = GetVertexInfoIndex(type); @@ -1142,9 +1160,10 @@ Result> GraphInfo::AddEdge( std::shared_ptr CreateGraphInfo( const std::string& name, const VertexInfoVector& vertex_infos, const EdgeInfoVector& edge_infos, const std::string& prefix, - std::shared_ptr version) { + std::shared_ptr version, + const std::unordered_map& extra_info) { return std::make_shared(name, vertex_infos, edge_infos, prefix, - version); + version, extra_info); } Result> GraphInfo::Load(const std::string& path) { @@ -1197,6 +1216,16 @@ Result GraphInfo::Dump() const { if (impl_->version_ != nullptr) { node["version"] = impl_->version_->ToString(); } + if (impl_->extra_info_.size() > 0) { + node["extra_info"]; + for (const auto& pair : impl_->extra_info_) { + ::Yaml::Node extra_info_node; + extra_info_node["key"] = pair.first; + extra_info_node["value"] = pair.second; + node["extra_info"].PushBack(); + node["extra_info"][node["extra_info"].Size() - 1] = extra_info_node; + } + } std::string dump_string; ::Yaml::Serialize(node, dump_string); return dump_string; diff --git a/cpp/test/test_info.cc b/cpp/test/test_info.cc index 252bfb345..d3d0ffec5 100644 --- a/cpp/test/test_info.cc +++ b/cpp/test/test_info.cc @@ -483,18 +483,36 @@ TEST_CASE("GraphInfo") { FileType::CSV, "p0_p1/"); auto vertex_info = CreateVertexInfo("test_vertex", 100, {pg}, "test_vertex/", version); + std::unordered_map extra_info = { + {"category", "test graph"}}; auto edge_info = CreateEdgeInfo("person", "knows", "person", 1024, 100, 100, true, {CreateAdjacentList(AdjListType::ordered_by_source, FileType::CSV, "adj_list/")}, {pg}, "test_edge/", version); - auto graph_info = - CreateGraphInfo(name, {vertex_info}, {edge_info}, "test_graph/", version); + auto graph_info = CreateGraphInfo(name, {vertex_info}, {edge_info}, + "test_graph/", version, extra_info); SECTION("Basics") { REQUIRE(graph_info->GetName() == name); REQUIRE(graph_info->GetPrefix() == "test_graph/"); REQUIRE(graph_info->version()->ToString() == "gar/v1"); + REQUIRE(graph_info->GetExtraInfo().size() == 1); + REQUIRE(graph_info->GetExtraInfo().find("category") != + graph_info->GetExtraInfo().end()); + REQUIRE(graph_info->GetExtraInfo().at("category") == "test graph"); + } + + SECTION("ExtraInfo") { + auto graph_info_with_extra_info = + CreateGraphInfo(name, {vertex_info}, {edge_info}, "test_graph/", + version, {{"key1", "value1"}, {"key2", "value2"}}); + const auto& extra_info = graph_info_with_extra_info->GetExtraInfo(); + REQUIRE(extra_info.size() == 2); + REQUIRE(extra_info.find("key1") != extra_info.end()); + REQUIRE(extra_info.at("key1") == "value1"); + REQUIRE(extra_info.find("key2") != extra_info.end()); + REQUIRE(extra_info.at("key2") == "value2"); } SECTION("VertexInfo") { @@ -552,6 +570,9 @@ TEST_CASE("GraphInfo") { REQUIRE(dump_result.status().ok()); std::string expected = R"(edges: - person_knows_person.edge.yaml +extra_info: + - key: category + value: test graph name: test_graph prefix: test_graph/ version: gar/v1 @@ -662,6 +683,9 @@ version: gar/v1 std::string graph_info_yaml = R"(name: ldbc_sample prefix: /tmp/ldbc/ version: gar/v1 +extra_info: + - key: category + value: test graph )"; SECTION("VertexInfo::Load") { @@ -690,6 +714,11 @@ version: gar/v1 auto graph_info = maybe_graph_info.value(); REQUIRE(graph_info->GetName() == "ldbc_sample"); REQUIRE(graph_info->GetPrefix() == "/tmp/ldbc/"); + REQUIRE(graph_info->version()->ToString() == "gar/v1"); + const auto& extra_info = graph_info->GetExtraInfo(); + REQUIRE(extra_info.size() == 1); + REQUIRE(extra_info.find("category") != extra_info.end()); + REQUIRE(extra_info.at("category") == "test graph"); } } diff --git a/docs/file-format.rst b/docs/file-format.rst index 2b581ae78..db8f32a19 100644 --- a/docs/file-format.rst +++ b/docs/file-format.rst @@ -118,6 +118,7 @@ A graph information file which named ".graph.yml" describes the meta infor - the root directory path of the data files; - the vertex information and edge information files included; - the version of GraphAr. +- extra information for the graph, could be used for user defined information. A vertex information file which named "