Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feat][Format][C++] Support extra info in graph info #356

Merged
merged 5 commits into from
Feb 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion cpp/include/gar/fwd.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <memory>
#include <optional>
#include <string>
#include <unordered_map>
#include <vector>

#include "gar/external/result.hpp"
Expand Down Expand Up @@ -115,7 +116,8 @@ std::shared_ptr<EdgeInfo> CreateEdgeInfo(
std::shared_ptr<GraphInfo> CreateGraphInfo(
const std::string& name, const VertexInfoVector& vertex_infos,
const EdgeInfoVector& edge_infos, const std::string& prefix,
std::shared_ptr<const InfoVersion> version = nullptr);
std::shared_ptr<const InfoVersion> version = nullptr,
const std::unordered_map<std::string, std::string>& extra_info = {});

const std::shared_ptr<DataType>& boolean();
const std::shared_ptr<DataType>& int32();
Expand Down
18 changes: 14 additions & 4 deletions cpp/include/gar/graph_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include <memory>
#include <string>
#include <unordered_map>
#include <vector>

#include "gar/fwd.h"
Expand Down Expand Up @@ -673,11 +674,13 @@ class GraphInfo {
* @param prefix The absolute path prefix to store chunk files of the graph.
* Defaults to "./".
* @param version The version of the graph info.
* @param extra_info The extra metadata of the graph info.
*/
explicit GraphInfo(const std::string& graph_name,
VertexInfoVector vertex_infos, EdgeInfoVector edge_infos,
const std::string& prefix = "./",
std::shared_ptr<const InfoVersion> version = nullptr);
explicit GraphInfo(
const std::string& graph_name, VertexInfoVector vertex_infos,
EdgeInfoVector edge_infos, const std::string& prefix = "./",
std::shared_ptr<const InfoVersion> version = nullptr,
const std::unordered_map<std::string, std::string>& extra_info = {});

/**
* @brief Loads the input file as a `GraphInfo` instance.
Expand Down Expand Up @@ -738,6 +741,13 @@ class GraphInfo {
*/
const std::shared_ptr<const InfoVersion>& version() const;

/**
* @brief Get the extra metadata of the graph info object.
*
* @return The extra metadata of the graph info object.
*/
const std::unordered_map<std::string, std::string>& GetExtraInfo() const;

/**
* @brief Get the vertex info with the given label.
* @param label The label of the vertex.
Expand Down
51 changes: 40 additions & 11 deletions cpp/src/graph_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
* limitations under the License.
*/

#include <iostream>
#include <unordered_set>

#include "yaml/Yaml.hpp"
Expand Down Expand Up @@ -956,6 +955,16 @@ static Result<std::shared_ptr<GraphInfo>> ConstructGraphInfo(
version, InfoVersion::Parse(
graph_meta->operator[]("version").As<std::string>()));
}
std::unordered_map<std::string, std::string> extra_info;
if (!graph_meta->operator[]("extra_info").IsNone()) {
auto& extra_info_node = graph_meta->operator[]("extra_info");
for (auto it = extra_info_node.Begin(); it != extra_info_node.End(); it++) {
auto node = (*it).second;
auto key = node["key"].As<std::string>();
auto value = node["value"].As<std::string>();
extra_info.emplace(key, value);
}
}

VertexInfoVector vertex_infos;
EdgeInfoVector edge_infos;
Expand Down Expand Up @@ -983,7 +992,7 @@ static Result<std::shared_ptr<GraphInfo>> ConstructGraphInfo(
}
}
return std::make_shared<GraphInfo>(name, vertex_infos, edge_infos, prefix,
version);
version, extra_info);
}

} // namespace
Expand All @@ -992,12 +1001,14 @@ class GraphInfo::Impl {
public:
Impl(const std::string& graph_name, VertexInfoVector vertex_infos,
EdgeInfoVector edge_infos, const std::string& prefix,
std::shared_ptr<const InfoVersion> version)
std::shared_ptr<const InfoVersion> version,
const std::unordered_map<std::string, std::string>& extra_info)
: name_(graph_name),
vertex_infos_(std::move(vertex_infos)),
edge_infos_(std::move(edge_infos)),
prefix_(prefix),
version_(std::move(version)) {
version_(std::move(version)),
extra_info_(extra_info) {
for (int i = 0; i < vertex_infos_.size(); i++) {
vlabel_to_index_[vertex_infos_[i]->GetLabel()] = i;
}
Expand Down Expand Up @@ -1035,16 +1046,18 @@ class GraphInfo::Impl {
EdgeInfoVector edge_infos_;
std::string prefix_;
std::shared_ptr<const InfoVersion> version_;
std::unordered_map<std::string, std::string> extra_info_;
std::unordered_map<std::string, int> vlabel_to_index_;
std::unordered_map<std::string, int> elabel_to_index_;
};

GraphInfo::GraphInfo(const std::string& graph_name,
VertexInfoVector vertex_infos, EdgeInfoVector edge_infos,
const std::string& prefix,
std::shared_ptr<const InfoVersion> version)
GraphInfo::GraphInfo(
const std::string& graph_name, VertexInfoVector vertex_infos,
EdgeInfoVector edge_infos, const std::string& prefix,
std::shared_ptr<const InfoVersion> version,
const std::unordered_map<std::string, std::string>& extra_info)
: impl_(new Impl(graph_name, std::move(vertex_infos), std::move(edge_infos),
prefix, version)) {}
prefix, version, extra_info)) {}

const std::string& GraphInfo::GetName() const { return impl_->name_; }

Expand All @@ -1054,6 +1067,11 @@ const std::shared_ptr<const InfoVersion>& GraphInfo::version() const {
return impl_->version_;
}

const std::unordered_map<std::string, std::string>& GraphInfo::GetExtraInfo()
const {
return impl_->extra_info_;
}

std::shared_ptr<VertexInfo> GraphInfo::GetVertexInfo(
const std::string& type) const {
int i = GetVertexInfoIndex(type);
Expand Down Expand Up @@ -1142,9 +1160,10 @@ Result<std::shared_ptr<GraphInfo>> GraphInfo::AddEdge(
std::shared_ptr<GraphInfo> CreateGraphInfo(
const std::string& name, const VertexInfoVector& vertex_infos,
const EdgeInfoVector& edge_infos, const std::string& prefix,
std::shared_ptr<const InfoVersion> version) {
std::shared_ptr<const InfoVersion> version,
const std::unordered_map<std::string, std::string>& extra_info) {
return std::make_shared<GraphInfo>(name, vertex_infos, edge_infos, prefix,
version);
version, extra_info);
}

Result<std::shared_ptr<GraphInfo>> GraphInfo::Load(const std::string& path) {
Expand Down Expand Up @@ -1197,6 +1216,16 @@ Result<std::string> GraphInfo::Dump() const {
if (impl_->version_ != nullptr) {
node["version"] = impl_->version_->ToString();
}
if (impl_->extra_info_.size() > 0) {
node["extra_info"];
for (const auto& pair : impl_->extra_info_) {
::Yaml::Node extra_info_node;
extra_info_node["key"] = pair.first;
extra_info_node["value"] = pair.second;
node["extra_info"].PushBack();
node["extra_info"][node["extra_info"].Size() - 1] = extra_info_node;
}
}
std::string dump_string;
::Yaml::Serialize(node, dump_string);
return dump_string;
Expand Down
33 changes: 31 additions & 2 deletions cpp/test/test_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -483,18 +483,36 @@ TEST_CASE("GraphInfo") {
FileType::CSV, "p0_p1/");
auto vertex_info =
CreateVertexInfo("test_vertex", 100, {pg}, "test_vertex/", version);
std::unordered_map<std::string, std::string> extra_info = {
{"category", "test graph"}};
auto edge_info =
CreateEdgeInfo("person", "knows", "person", 1024, 100, 100, true,
{CreateAdjacentList(AdjListType::ordered_by_source,
FileType::CSV, "adj_list/")},
{pg}, "test_edge/", version);
auto graph_info =
CreateGraphInfo(name, {vertex_info}, {edge_info}, "test_graph/", version);
auto graph_info = CreateGraphInfo(name, {vertex_info}, {edge_info},
"test_graph/", version, extra_info);

SECTION("Basics") {
REQUIRE(graph_info->GetName() == name);
REQUIRE(graph_info->GetPrefix() == "test_graph/");
REQUIRE(graph_info->version()->ToString() == "gar/v1");
REQUIRE(graph_info->GetExtraInfo().size() == 1);
REQUIRE(graph_info->GetExtraInfo().find("category") !=
graph_info->GetExtraInfo().end());
REQUIRE(graph_info->GetExtraInfo().at("category") == "test graph");
}

SECTION("ExtraInfo") {
auto graph_info_with_extra_info =
CreateGraphInfo(name, {vertex_info}, {edge_info}, "test_graph/",
version, {{"key1", "value1"}, {"key2", "value2"}});
const auto& extra_info = graph_info_with_extra_info->GetExtraInfo();
REQUIRE(extra_info.size() == 2);
REQUIRE(extra_info.find("key1") != extra_info.end());
REQUIRE(extra_info.at("key1") == "value1");
REQUIRE(extra_info.find("key2") != extra_info.end());
REQUIRE(extra_info.at("key2") == "value2");
}

SECTION("VertexInfo") {
Expand Down Expand Up @@ -552,6 +570,9 @@ TEST_CASE("GraphInfo") {
REQUIRE(dump_result.status().ok());
std::string expected = R"(edges:
- person_knows_person.edge.yaml
extra_info:
- key: category
value: test graph
name: test_graph
prefix: test_graph/
version: gar/v1
Expand Down Expand Up @@ -662,6 +683,9 @@ version: gar/v1
std::string graph_info_yaml = R"(name: ldbc_sample
prefix: /tmp/ldbc/
version: gar/v1
extra_info:
- key: category
value: test graph
)";

SECTION("VertexInfo::Load") {
Expand Down Expand Up @@ -690,6 +714,11 @@ version: gar/v1
auto graph_info = maybe_graph_info.value();
REQUIRE(graph_info->GetName() == "ldbc_sample");
REQUIRE(graph_info->GetPrefix() == "/tmp/ldbc/");
REQUIRE(graph_info->version()->ToString() == "gar/v1");
const auto& extra_info = graph_info->GetExtraInfo();
REQUIRE(extra_info.size() == 1);
REQUIRE(extra_info.find("category") != extra_info.end());
REQUIRE(extra_info.at("category") == "test graph");
}
}

Expand Down
3 changes: 2 additions & 1 deletion docs/file-format.rst
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ A graph information file which named "<name>.graph.yml" describes the meta infor
- the root directory path of the data files;
- the vertex information and edge information files included;
- the version of GraphAr.
- extra information for the graph, could be used for user defined information.

A vertex information file which named "<label>.vertex.yml" defines a single group of vertices with the same vertex label <label>, and all vertices in this group have the same schema. The file defines:

Expand Down Expand Up @@ -164,7 +165,7 @@ GraphAr provides a set of built-in data types that are common in real use cases
- float
- double
- string
- list (of int32, int64, float, double; not supported by CSV)
- list (of int32, int64, float, double, string; not supported by CSV)

.. tip::

Expand Down
Loading