Skip to content

Commit

Permalink
[Feat][Format][C++] Support extra info in graph info (#356)
Browse files Browse the repository at this point in the history
* [Feat][Format][C++] Support extra info in graph info
  • Loading branch information
acezen authored Feb 4, 2024
1 parent 4d40e3a commit 4d092b3
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 19 deletions.
4 changes: 3 additions & 1 deletion cpp/include/gar/fwd.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <memory>
#include <optional>
#include <string>
#include <unordered_map>
#include <vector>

#include "gar/external/result.hpp"
Expand Down Expand Up @@ -115,7 +116,8 @@ std::shared_ptr<EdgeInfo> CreateEdgeInfo(
std::shared_ptr<GraphInfo> CreateGraphInfo(
const std::string& name, const VertexInfoVector& vertex_infos,
const EdgeInfoVector& edge_infos, const std::string& prefix,
std::shared_ptr<const InfoVersion> version = nullptr);
std::shared_ptr<const InfoVersion> version = nullptr,
const std::unordered_map<std::string, std::string>& extra_info = {});

const std::shared_ptr<DataType>& boolean();
const std::shared_ptr<DataType>& int32();
Expand Down
18 changes: 14 additions & 4 deletions cpp/include/gar/graph_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include <memory>
#include <string>
#include <unordered_map>
#include <vector>

#include "gar/fwd.h"
Expand Down Expand Up @@ -673,11 +674,13 @@ class GraphInfo {
* @param prefix The absolute path prefix to store chunk files of the graph.
* Defaults to "./".
* @param version The version of the graph info.
* @param extra_info The extra metadata of the graph info.
*/
explicit GraphInfo(const std::string& graph_name,
VertexInfoVector vertex_infos, EdgeInfoVector edge_infos,
const std::string& prefix = "./",
std::shared_ptr<const InfoVersion> version = nullptr);
explicit GraphInfo(
const std::string& graph_name, VertexInfoVector vertex_infos,
EdgeInfoVector edge_infos, const std::string& prefix = "./",
std::shared_ptr<const InfoVersion> version = nullptr,
const std::unordered_map<std::string, std::string>& extra_info = {});

/**
* @brief Loads the input file as a `GraphInfo` instance.
Expand Down Expand Up @@ -738,6 +741,13 @@ class GraphInfo {
*/
const std::shared_ptr<const InfoVersion>& version() const;

/**
* @brief Get the extra metadata of the graph info object.
*
* @return The extra metadata of the graph info object.
*/
const std::unordered_map<std::string, std::string>& GetExtraInfo() const;

/**
* @brief Get the vertex info with the given label.
* @param label The label of the vertex.
Expand Down
51 changes: 40 additions & 11 deletions cpp/src/graph_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
* limitations under the License.
*/

#include <iostream>
#include <unordered_set>

#include "yaml/Yaml.hpp"
Expand Down Expand Up @@ -956,6 +955,16 @@ static Result<std::shared_ptr<GraphInfo>> ConstructGraphInfo(
version, InfoVersion::Parse(
graph_meta->operator[]("version").As<std::string>()));
}
std::unordered_map<std::string, std::string> extra_info;
if (!graph_meta->operator[]("extra_info").IsNone()) {
auto& extra_info_node = graph_meta->operator[]("extra_info");
for (auto it = extra_info_node.Begin(); it != extra_info_node.End(); it++) {
auto node = (*it).second;
auto key = node["key"].As<std::string>();
auto value = node["value"].As<std::string>();
extra_info.emplace(key, value);
}
}

VertexInfoVector vertex_infos;
EdgeInfoVector edge_infos;
Expand Down Expand Up @@ -983,7 +992,7 @@ static Result<std::shared_ptr<GraphInfo>> ConstructGraphInfo(
}
}
return std::make_shared<GraphInfo>(name, vertex_infos, edge_infos, prefix,
version);
version, extra_info);
}

} // namespace
Expand All @@ -992,12 +1001,14 @@ class GraphInfo::Impl {
public:
Impl(const std::string& graph_name, VertexInfoVector vertex_infos,
EdgeInfoVector edge_infos, const std::string& prefix,
std::shared_ptr<const InfoVersion> version)
std::shared_ptr<const InfoVersion> version,
const std::unordered_map<std::string, std::string>& extra_info)
: name_(graph_name),
vertex_infos_(std::move(vertex_infos)),
edge_infos_(std::move(edge_infos)),
prefix_(prefix),
version_(std::move(version)) {
version_(std::move(version)),
extra_info_(extra_info) {
for (int i = 0; i < vertex_infos_.size(); i++) {
vlabel_to_index_[vertex_infos_[i]->GetLabel()] = i;
}
Expand Down Expand Up @@ -1035,16 +1046,18 @@ class GraphInfo::Impl {
EdgeInfoVector edge_infos_;
std::string prefix_;
std::shared_ptr<const InfoVersion> version_;
std::unordered_map<std::string, std::string> extra_info_;
std::unordered_map<std::string, int> vlabel_to_index_;
std::unordered_map<std::string, int> elabel_to_index_;
};

GraphInfo::GraphInfo(const std::string& graph_name,
VertexInfoVector vertex_infos, EdgeInfoVector edge_infos,
const std::string& prefix,
std::shared_ptr<const InfoVersion> version)
GraphInfo::GraphInfo(
const std::string& graph_name, VertexInfoVector vertex_infos,
EdgeInfoVector edge_infos, const std::string& prefix,
std::shared_ptr<const InfoVersion> version,
const std::unordered_map<std::string, std::string>& extra_info)
: impl_(new Impl(graph_name, std::move(vertex_infos), std::move(edge_infos),
prefix, version)) {}
prefix, version, extra_info)) {}

const std::string& GraphInfo::GetName() const { return impl_->name_; }

Expand All @@ -1054,6 +1067,11 @@ const std::shared_ptr<const InfoVersion>& GraphInfo::version() const {
return impl_->version_;
}

const std::unordered_map<std::string, std::string>& GraphInfo::GetExtraInfo()
const {
return impl_->extra_info_;
}

std::shared_ptr<VertexInfo> GraphInfo::GetVertexInfo(
const std::string& type) const {
int i = GetVertexInfoIndex(type);
Expand Down Expand Up @@ -1142,9 +1160,10 @@ Result<std::shared_ptr<GraphInfo>> GraphInfo::AddEdge(
std::shared_ptr<GraphInfo> CreateGraphInfo(
const std::string& name, const VertexInfoVector& vertex_infos,
const EdgeInfoVector& edge_infos, const std::string& prefix,
std::shared_ptr<const InfoVersion> version) {
std::shared_ptr<const InfoVersion> version,
const std::unordered_map<std::string, std::string>& extra_info) {
return std::make_shared<GraphInfo>(name, vertex_infos, edge_infos, prefix,
version);
version, extra_info);
}

Result<std::shared_ptr<GraphInfo>> GraphInfo::Load(const std::string& path) {
Expand Down Expand Up @@ -1197,6 +1216,16 @@ Result<std::string> GraphInfo::Dump() const {
if (impl_->version_ != nullptr) {
node["version"] = impl_->version_->ToString();
}
if (impl_->extra_info_.size() > 0) {
node["extra_info"];
for (const auto& pair : impl_->extra_info_) {
::Yaml::Node extra_info_node;
extra_info_node["key"] = pair.first;
extra_info_node["value"] = pair.second;
node["extra_info"].PushBack();
node["extra_info"][node["extra_info"].Size() - 1] = extra_info_node;
}
}
std::string dump_string;
::Yaml::Serialize(node, dump_string);
return dump_string;
Expand Down
33 changes: 31 additions & 2 deletions cpp/test/test_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -483,18 +483,36 @@ TEST_CASE("GraphInfo") {
FileType::CSV, "p0_p1/");
auto vertex_info =
CreateVertexInfo("test_vertex", 100, {pg}, "test_vertex/", version);
std::unordered_map<std::string, std::string> extra_info = {
{"category", "test graph"}};
auto edge_info =
CreateEdgeInfo("person", "knows", "person", 1024, 100, 100, true,
{CreateAdjacentList(AdjListType::ordered_by_source,
FileType::CSV, "adj_list/")},
{pg}, "test_edge/", version);
auto graph_info =
CreateGraphInfo(name, {vertex_info}, {edge_info}, "test_graph/", version);
auto graph_info = CreateGraphInfo(name, {vertex_info}, {edge_info},
"test_graph/", version, extra_info);

SECTION("Basics") {
REQUIRE(graph_info->GetName() == name);
REQUIRE(graph_info->GetPrefix() == "test_graph/");
REQUIRE(graph_info->version()->ToString() == "gar/v1");
REQUIRE(graph_info->GetExtraInfo().size() == 1);
REQUIRE(graph_info->GetExtraInfo().find("category") !=
graph_info->GetExtraInfo().end());
REQUIRE(graph_info->GetExtraInfo().at("category") == "test graph");
}

SECTION("ExtraInfo") {
auto graph_info_with_extra_info =
CreateGraphInfo(name, {vertex_info}, {edge_info}, "test_graph/",
version, {{"key1", "value1"}, {"key2", "value2"}});
const auto& extra_info = graph_info_with_extra_info->GetExtraInfo();
REQUIRE(extra_info.size() == 2);
REQUIRE(extra_info.find("key1") != extra_info.end());
REQUIRE(extra_info.at("key1") == "value1");
REQUIRE(extra_info.find("key2") != extra_info.end());
REQUIRE(extra_info.at("key2") == "value2");
}

SECTION("VertexInfo") {
Expand Down Expand Up @@ -552,6 +570,9 @@ TEST_CASE("GraphInfo") {
REQUIRE(dump_result.status().ok());
std::string expected = R"(edges:
- person_knows_person.edge.yaml
extra_info:
- key: category
value: test graph
name: test_graph
prefix: test_graph/
version: gar/v1
Expand Down Expand Up @@ -662,6 +683,9 @@ version: gar/v1
std::string graph_info_yaml = R"(name: ldbc_sample
prefix: /tmp/ldbc/
version: gar/v1
extra_info:
- key: category
value: test graph
)";

SECTION("VertexInfo::Load") {
Expand Down Expand Up @@ -690,6 +714,11 @@ version: gar/v1
auto graph_info = maybe_graph_info.value();
REQUIRE(graph_info->GetName() == "ldbc_sample");
REQUIRE(graph_info->GetPrefix() == "/tmp/ldbc/");
REQUIRE(graph_info->version()->ToString() == "gar/v1");
const auto& extra_info = graph_info->GetExtraInfo();
REQUIRE(extra_info.size() == 1);
REQUIRE(extra_info.find("category") != extra_info.end());
REQUIRE(extra_info.at("category") == "test graph");
}
}

Expand Down
3 changes: 2 additions & 1 deletion docs/file-format.rst
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ A graph information file which named "<name>.graph.yml" describes the meta infor
- the root directory path of the data files;
- the vertex information and edge information files included;
- the version of GraphAr.
- extra information for the graph, could be used for user defined information.

A vertex information file which named "<label>.vertex.yml" defines a single group of vertices with the same vertex label <label>, and all vertices in this group have the same schema. The file defines:

Expand Down Expand Up @@ -164,7 +165,7 @@ GraphAr provides a set of built-in data types that are common in real use cases
- float
- double
- string
- list (of int32, int64, float, double; not supported by CSV)
- list (of int32, int64, float, double, string; not supported by CSV)

.. tip::

Expand Down

0 comments on commit 4d092b3

Please sign in to comment.