Skip to content

Commit

Permalink
Add InfoVersion to store version information of info and support data…
Browse files Browse the repository at this point in the history
… type extension base on info version (#27)
  • Loading branch information
acezen authored Dec 13, 2022
1 parent 2045c5d commit 18e3f61
Show file tree
Hide file tree
Showing 27 changed files with 561 additions and 195 deletions.
7 changes: 7 additions & 0 deletions docs/api-reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -210,3 +210,10 @@ Yaml Parser
.. doxygenclass:: GraphArchive::Yaml
:members:
:undoc-members:

Info Version
~~~~~~~~~~~~~~~~~~~

.. doxygenclass:: GraphArchive::InfoVersion
:members:
:undoc-members:
54 changes: 41 additions & 13 deletions include/gar/graph_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ limitations under the License.
#include "utils/result.h"
#include "utils/status.h"
#include "utils/utils.h"
#include "utils/version_parser.h"
#include "utils/yaml.h"

namespace GAR_NAMESPACE_INTERNAL {
Expand All @@ -35,13 +36,13 @@ class Yaml;

/// Property is a struct to store the property information.
struct Property {
std::string name; // property name
DataType::type type; // property data type
bool is_primary; // primary key tag
std::string name; // property name
DataType type; // property data type
bool is_primary; // primary key tag
};

static bool operator==(const Property& lhs, const Property& rhs) {
return (lhs.name == rhs.name) && (lhs.type == rhs.type) &&
return (lhs.name == rhs.name) && (lhs.type.Equals(rhs.type)) &&
(lhs.is_primary == rhs.is_primary);
}

Expand Down Expand Up @@ -119,11 +120,16 @@ class VertexInfo {
*
* @param label The label of the vertex.
* @param chunk_size number of vertex in each vertex chunk.
* @param version version of the vertex info.
* @param prefix prefix of the vertex info.
*/
explicit VertexInfo(const std::string& label, IdType chunk_size,
const InfoVersion& version,
const std::string& prefix = "")
: label_(label), chunk_size_(chunk_size), prefix_(prefix) {
: label_(label),
chunk_size_(chunk_size),
version_(version),
prefix_(prefix) {
if (prefix_.empty()) {
prefix_ = label_ + "/"; // default prefix
}
Expand Down Expand Up @@ -157,6 +163,10 @@ class VertexInfo {

property_groups_.push_back(property_group);
for (const auto& p : property_group.GetProperties()) {
if (!version_.CheckType(p.type.ToTypeName())) {
return Status::Invalid(
"The property type is not supported by the version.");
}
p2type_[p.name] = p.type;
p2primary_[p.name] = p.is_primary;
p2group_index_[p.name] = property_groups_.size() - 1;
Expand All @@ -173,6 +183,9 @@ class VertexInfo {
/// Get the path prefix of the vertex.
inline std::string GetPrefix() const { return prefix_; }

/// Get the version info of the vertex.
inline const InfoVersion& GetVersion() const { return version_; }

/// Get the property groups of the vertex.
inline const std::vector<PropertyGroup>& GetPropertyGroups() const {
return property_groups_;
Expand All @@ -188,7 +201,7 @@ class VertexInfo {
}

/// Get the data type of property
inline Result<DataType::type> GetPropertyType(
inline Result<DataType> GetPropertyType(
const std::string& property_name) const noexcept {
if (p2type_.find(property_name) == p2type_.end()) {
return Status::KeyError("The property is not found.");
Expand Down Expand Up @@ -285,9 +298,10 @@ class VertexInfo {
private:
std::string label_;
IdType chunk_size_;
InfoVersion version_;
std::string prefix_;
std::vector<PropertyGroup> property_groups_;
std::map<std::string, DataType::type> p2type_;
std::map<std::string, DataType> p2type_;
std::map<std::string, bool> p2primary_;
std::map<std::string, size_t> p2group_index_;
};
Expand All @@ -310,19 +324,21 @@ class EdgeInfo {
* @param src_chunk_size number of source vertices in each vertex chunk
* @param dst_chunk_size number of destination vertices in each vertex chunk
* @param directed whether the edge is directed
* @param version version of the edge info
* @param prefix prefix of the edge info
*/
explicit EdgeInfo(const std::string& src_label, const std::string& edge_label,
const std::string& dst_label, IdType chunk_size,
IdType src_chunk_size, IdType dst_chunk_size, bool directed,
const std::string& prefix = "")
const InfoVersion& version, const std::string& prefix = "")
: src_label_(src_label),
edge_label_(edge_label),
dst_label_(dst_label),
chunk_size_(chunk_size),
src_chunk_size_(src_chunk_size),
dst_chunk_size_(dst_chunk_size),
directed_(directed),
version_(version),
prefix_(prefix) {
if (prefix_.empty()) {
prefix_ = src_label_ + REGULAR_SEPERATOR + edge_label_ +
Expand Down Expand Up @@ -391,6 +407,10 @@ class EdgeInfo {
}
adj_list2property_groups_[adj_list_type].push_back(property_group);
for (auto& p : property_group.GetProperties()) {
if (!version_.CheckType(p.type.ToTypeName())) {
return Status::Invalid(
"The property type is not supported by the version.");
}
p2type_[p.name] = p.type;
p2primary_[p.name] = p.is_primary;
p2group_index_[p.name][adj_list_type] =
Expand Down Expand Up @@ -423,6 +443,9 @@ class EdgeInfo {
/// Check if edge is directed.
inline bool IsDirected() const noexcept { return directed_; }

/// Get the version info of the edge.
inline const InfoVersion& GetVersion() const { return version_; }

/// Get path prefix of adj list type.
inline Result<std::string> GetAdjListPrefix(AdjListType adj_list_type) const {
if (!ContainAdjList(adj_list_type)) {
Expand Down Expand Up @@ -587,8 +610,7 @@ class EdgeInfo {
}

/// Get the data type of property
Result<DataType::type> GetPropertyType(const std::string& property) const
noexcept {
Result<DataType> GetPropertyType(const std::string& property) const noexcept {
if (p2type_.find(property) == p2type_.end()) {
return Status::KeyError("The property is not found.");
}
Expand Down Expand Up @@ -687,8 +709,9 @@ class EdgeInfo {
std::string dst_label_;
IdType chunk_size_, src_chunk_size_, dst_chunk_size_;
bool directed_;
InfoVersion version_;
std::string prefix_;
std::map<std::string, DataType::type> p2type_;
std::map<std::string, DataType> p2type_;
std::map<std::string, bool> p2primary_;
std::map<std::string, std::map<AdjListType, size_t>> p2group_index_;
std::map<AdjListType, std::string> adj_list2prefix_;
Expand All @@ -704,11 +727,12 @@ class GraphInfo {
* the prefix of graph would be ./ by default.
*
* @param[in] graph_name name of graph
* @param[in] version version of graph info
* @param[in] prefix absolute path prefix to store chunk files of graph.
*/
explicit GraphInfo(const std::string& graph_name,
explicit GraphInfo(const std::string& graph_name, const InfoVersion& version,
const std::string& prefix = "./")
: name_(graph_name), prefix_(prefix) {}
: name_(graph_name), version_(version), prefix_(prefix) {}

/**
* @brief Loads the input file as a GraphInfo instance.
Expand Down Expand Up @@ -772,6 +796,9 @@ class GraphInfo {
/// Get the absolute path prefix of chunk files.
inline std::string GetPrefix() const noexcept { return prefix_; }

/// Get the version info of the edge.
inline const InfoVersion& GetVersion() const { return version_; }

/// Get the vertex info by vertex label
inline Result<const VertexInfo&> GetVertexInfo(const std::string& label) const
noexcept {
Expand Down Expand Up @@ -873,6 +900,7 @@ class GraphInfo {

private:
std::string name_;
InfoVersion version_;
std::string prefix_;
std::map<std::string, VertexInfo> vertex2info_; // label -> info
std::map<std::string, EdgeInfo>
Expand Down
26 changes: 11 additions & 15 deletions include/gar/utils/convert_to_arrow_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ limitations under the License.
namespace GAR_NAMESPACE_INTERNAL {

/// \brief Struct to convert DataType to arrow::DataType.
template <DataType::type T>
template <Type T>
struct ConvertToArrowType {};

#define CONVERT_TO_ARROW_TYPE(type, c_type, arrow_type, array_type, \
Expand All @@ -42,22 +42,18 @@ struct ConvertToArrowType {};
static const char* type_to_string() { return str; } \
};

CONVERT_TO_ARROW_TYPE(DataType::type::BOOL, bool, arrow::BooleanType,
arrow::BooleanArray, arrow::BooleanBuilder,
arrow::boolean(), "boolean")
CONVERT_TO_ARROW_TYPE(DataType::type::INT32, int32_t, arrow::Int32Type,
arrow::Int32Array, arrow::Int32Builder, arrow::int32(),
"int32")
CONVERT_TO_ARROW_TYPE(DataType::type::INT64, int64_t, arrow::Int64Type,
arrow::Int64Array, arrow::Int64Builder, arrow::int64(),
"int64")
CONVERT_TO_ARROW_TYPE(DataType::type::FLOAT, float, arrow::FloatType,
arrow::FloatArray, arrow::FloatBuilder, arrow::float32(),
"float")
CONVERT_TO_ARROW_TYPE(DataType::type::DOUBLE, double, arrow::DoubleType,
CONVERT_TO_ARROW_TYPE(Type::BOOL, bool, arrow::BooleanType, arrow::BooleanArray,
arrow::BooleanBuilder, arrow::boolean(), "boolean")
CONVERT_TO_ARROW_TYPE(Type::INT32, int32_t, arrow::Int32Type, arrow::Int32Array,
arrow::Int32Builder, arrow::int32(), "int32")
CONVERT_TO_ARROW_TYPE(Type::INT64, int64_t, arrow::Int64Type, arrow::Int64Array,
arrow::Int64Builder, arrow::int64(), "int64")
CONVERT_TO_ARROW_TYPE(Type::FLOAT, float, arrow::FloatType, arrow::FloatArray,
arrow::FloatBuilder, arrow::float32(), "float")
CONVERT_TO_ARROW_TYPE(Type::DOUBLE, double, arrow::DoubleType,
arrow::DoubleArray, arrow::DoubleBuilder,
arrow::float64(), "double")
CONVERT_TO_ARROW_TYPE(DataType::type::STRING, std::string, arrow::StringType,
CONVERT_TO_ARROW_TYPE(Type::STRING, std::string, arrow::StringType,
arrow::StringArray, arrow::StringBuilder, arrow::utf8(),
"string")

Expand Down
103 changes: 66 additions & 37 deletions include/gar/utils/data_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ limitations under the License.
#include <map>
#include <memory>
#include <string>
#include <utility>

#include "gar/utils/macros.h"

Expand All @@ -29,58 +30,86 @@ class DataType;

namespace GAR_NAMESPACE_INTERNAL {

/// \brief Main data type enumeration
enum class Type {
/// Boolean as 1 bit, LSB bit-packed ordering
BOOL = 0,

/// Signed 32-bit little-endian integer
INT32,

/// Signed 64-bit little-endian integer
INT64,

/// 4-byte floating point value
FLOAT,

/// 8-byte floating point value
DOUBLE,

/// UTF8 variable-length string as List<Char>
STRING,

/// User-defined data type
USER_DEFINED,

// Leave this at the end
MAX_ID,
};

/// \brief The DataType struct to provide enum type for data type and functions
/// to parse data type.
struct DataType {
/// \brief Main data type enumeration
enum type {
/// Boolean as 1 bit, LSB bit-packed ordering
BOOL = 0,
class DataType {
public:
DataType() : id_(Type::BOOL) {}

/// Signed 32-bit little-endian integer
INT32 = 1,
explicit DataType(Type id, const std::string& user_defined_type_name = "")
: id_(id), user_defined_type_name_(user_defined_type_name) {}

/// Signed 64-bit little-endian integer
INT64 = 2,
DataType(const DataType& other)
: id_(other.id_),
user_defined_type_name_(other.user_defined_type_name_) {}

/// 4-byte floating point value
FLOAT = 3,
explicit DataType(DataType&& other)
: id_(other.id_),
user_defined_type_name_(std::move(other.user_defined_type_name_)) {}

/// 8-byte floating point value
DOUBLE = 4,
inline DataType& operator=(const DataType& other) = default;

/// UTF8 variable-length string as List<Char>
STRING = 5,
bool Equals(const DataType& other) const {
return id_ == other.id_ &&
user_defined_type_name_ == other.user_defined_type_name_;
}

// Leave this at the end
MAX_ID = 6,
};
bool operator==(const DataType& other) const { return Equals(other); }

static std::shared_ptr<arrow::DataType> DataTypeToArrowDataType(
DataType::type type_id);
DataType type_id);

static DataType::type ArrowDataTypeToDataType(
static DataType ArrowDataTypeToDataType(
std::shared_ptr<arrow::DataType> type);

static DataType::type StringToDataType(const std::string& str) {
static const std::map<std::string, DataType::type> str2type{
{"bool", DataType::type::BOOL}, {"int32", DataType::type::INT32},
{"int64", DataType::type::INT64}, {"float", DataType::type::FLOAT},
{"double", DataType::type::DOUBLE}, {"string", DataType::type::STRING}};
try {
return str2type.at(str.c_str());
} catch (const std::exception& e) {
throw std::runtime_error("KeyError: " + str);
static DataType TypeNameToDataType(const std::string& str) {
static const std::map<std::string, Type> str2type{
{"bool", Type::BOOL}, {"int32", Type::INT32},
{"int64", Type::INT64}, {"float", Type::FLOAT},
{"double", Type::DOUBLE}, {"string", Type::STRING}};

if (str2type.find(str) == str2type.end()) {
return DataType(Type::USER_DEFINED, str);
}
return DataType(str2type.at(str.c_str()));
}
static const char* DataTypeToString(DataType::type type) {
static const std::map<DataType::type, const char*> type2str{
{DataType::type::BOOL, "bool"}, {DataType::type::INT32, "int32"},
{DataType::type::INT64, "int64"}, {DataType::type::FLOAT, "float"},
{DataType::type::DOUBLE, "double"}, {DataType::type::STRING, "string"}};
return type2str.at(type);
}
}; // struct Type

/// \brief Return the type category of the DataType.
Type id() const { return id_; }

std::string ToTypeName() const;

private:
Type id_;
std::string user_defined_type_name_;
}; // struct DataType
} // namespace GAR_NAMESPACE_INTERNAL

#endif // GAR_UTILS_DATA_TYPE_H_
1 change: 0 additions & 1 deletion include/gar/utils/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ limitations under the License.
#include "gar/utils/result.h"

#define REGULAR_SEPERATOR "_"
#define GAR_VERSION 1

namespace arrow {
class Array;
Expand Down
Loading

0 comments on commit 18e3f61

Please sign in to comment.