Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add InfoVersion to store version information of info and support data type extension base on info version #27

Merged
merged 11 commits into from
Dec 13, 2022
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/api-reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -210,3 +210,10 @@ Yaml Parser
.. doxygenclass:: GraphArchive::Yaml
:members:
:undoc-members:

Info Version
acezen marked this conversation as resolved.
Show resolved Hide resolved
~~~~~~~~~~~~~~~~~~~

.. doxygenclass:: GraphArchive::InfoVersion
:members:
:undoc-members:
45 changes: 32 additions & 13 deletions include/gar/graph_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ limitations under the License.
#include "utils/result.h"
#include "utils/status.h"
#include "utils/utils.h"
#include "utils/version_parser.h"
#include "utils/yaml.h"

namespace GAR_NAMESPACE_INTERNAL {
Expand All @@ -35,13 +36,13 @@ class Yaml;

/// Property is a struct to store the property information.
struct Property {
std::string name; // property name
DataType::type type; // property data type
bool is_primary; // primary key tag
std::string name; // property name
DataType type; // property data type
bool is_primary; // primary key tag
};

static bool operator==(const Property& lhs, const Property& rhs) {
return (lhs.name == rhs.name) && (lhs.type == rhs.type) &&
return (lhs.name == rhs.name) && (lhs.type.Equals(rhs.type)) &&
(lhs.is_primary == rhs.is_primary);
}

Expand Down Expand Up @@ -119,11 +120,16 @@ class VertexInfo {
*
* @param label The label of the vertex.
* @param chunk_size number of vertex in each vertex chunk.
* @param version version of the vertex info.
* @param prefix prefix of the vertex info.
*/
explicit VertexInfo(const std::string& label, IdType chunk_size,
const InfoVersion& version,
const std::string& prefix = "")
: label_(label), chunk_size_(chunk_size), prefix_(prefix) {
: label_(label),
chunk_size_(chunk_size),
version_(version),
prefix_(prefix) {
if (prefix_.empty()) {
prefix_ = label_ + "/"; // default prefix
}
Expand Down Expand Up @@ -157,6 +163,10 @@ class VertexInfo {

property_groups_.push_back(property_group);
for (const auto& p : property_group.GetProperties()) {
if (!version_.CheckType(p.type.ToTypeName())) {
return Status::Invalid(
"The property type is not supported by the version.");
}
p2type_[p.name] = p.type;
p2primary_[p.name] = p.is_primary;
p2group_index_[p.name] = property_groups_.size() - 1;
Expand Down Expand Up @@ -188,7 +198,7 @@ class VertexInfo {
}

/// Get the data type of property
inline Result<DataType::type> GetPropertyType(
inline Result<DataType> GetPropertyType(
const std::string& property_name) const noexcept {
if (p2type_.find(property_name) == p2type_.end()) {
return Status::KeyError("The property is not found.");
Expand Down Expand Up @@ -285,9 +295,10 @@ class VertexInfo {
private:
std::string label_;
IdType chunk_size_;
InfoVersion version_;
std::string prefix_;
std::vector<PropertyGroup> property_groups_;
std::map<std::string, DataType::type> p2type_;
std::map<std::string, DataType> p2type_;
std::map<std::string, bool> p2primary_;
std::map<std::string, size_t> p2group_index_;
};
Expand All @@ -310,19 +321,21 @@ class EdgeInfo {
* @param src_chunk_size number of source vertices in each vertex chunk
* @param dst_chunk_size number of destination vertices in each vertex chunk
* @param directed whether the edge is directed
* @param version version of the edge info
* @param prefix prefix of the edge info
*/
explicit EdgeInfo(const std::string& src_label, const std::string& edge_label,
const std::string& dst_label, IdType chunk_size,
IdType src_chunk_size, IdType dst_chunk_size, bool directed,
const std::string& prefix = "")
const InfoVersion& version, const std::string& prefix = "")
: src_label_(src_label),
edge_label_(edge_label),
dst_label_(dst_label),
chunk_size_(chunk_size),
src_chunk_size_(src_chunk_size),
dst_chunk_size_(dst_chunk_size),
directed_(directed),
version_(version),
prefix_(prefix) {
if (prefix_.empty()) {
prefix_ = src_label_ + REGULAR_SEPERATOR + edge_label_ +
Expand Down Expand Up @@ -391,6 +404,10 @@ class EdgeInfo {
}
adj_list2property_groups_[adj_list_type].push_back(property_group);
for (auto& p : property_group.GetProperties()) {
if (!version_.CheckType(p.type.ToTypeName())) {
return Status::Invalid(
"The property type is not supported by the version.");
}
p2type_[p.name] = p.type;
p2primary_[p.name] = p.is_primary;
p2group_index_[p.name][adj_list_type] =
Expand Down Expand Up @@ -587,8 +604,7 @@ class EdgeInfo {
}

/// Get the data type of property
Result<DataType::type> GetPropertyType(const std::string& property) const
noexcept {
Result<DataType> GetPropertyType(const std::string& property) const noexcept {
if (p2type_.find(property) == p2type_.end()) {
return Status::KeyError("The property is not found.");
}
Expand Down Expand Up @@ -687,8 +703,9 @@ class EdgeInfo {
std::string dst_label_;
IdType chunk_size_, src_chunk_size_, dst_chunk_size_;
bool directed_;
InfoVersion version_;
std::string prefix_;
std::map<std::string, DataType::type> p2type_;
std::map<std::string, DataType> p2type_;
std::map<std::string, bool> p2primary_;
std::map<std::string, std::map<AdjListType, size_t>> p2group_index_;
std::map<AdjListType, std::string> adj_list2prefix_;
Expand All @@ -704,11 +721,12 @@ class GraphInfo {
* the prefix of graph would be ./ by default.
*
* @param[in] graph_name name of graph
* @param[in] version version of graph info
* @param[in] prefix absolute path prefix to store chunk files of graph.
*/
explicit GraphInfo(const std::string& graph_name,
explicit GraphInfo(const std::string& graph_name, const InfoVersion& version,
const std::string& prefix = "./")
: name_(graph_name), prefix_(prefix) {}
: name_(graph_name), version_(version), prefix_(prefix) {}

/**
* @brief Loads the input file as a GraphInfo instance.
Expand Down Expand Up @@ -873,6 +891,7 @@ class GraphInfo {

private:
std::string name_;
InfoVersion version_;
acezen marked this conversation as resolved.
Show resolved Hide resolved
std::string prefix_;
std::map<std::string, VertexInfo> vertex2info_; // label -> info
std::map<std::string, EdgeInfo>
Expand Down
26 changes: 11 additions & 15 deletions include/gar/utils/convert_to_arrow_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ limitations under the License.
namespace GAR_NAMESPACE_INTERNAL {

/// \brief Struct to convert DataType to arrow::DataType.
template <DataType::type T>
template <Type T>
struct ConvertToArrowType {};

#define CONVERT_TO_ARROW_TYPE(type, c_type, arrow_type, array_type, \
Expand All @@ -42,22 +42,18 @@ struct ConvertToArrowType {};
static const char* type_to_string() { return str; } \
};

CONVERT_TO_ARROW_TYPE(DataType::type::BOOL, bool, arrow::BooleanType,
arrow::BooleanArray, arrow::BooleanBuilder,
arrow::boolean(), "boolean")
CONVERT_TO_ARROW_TYPE(DataType::type::INT32, int32_t, arrow::Int32Type,
arrow::Int32Array, arrow::Int32Builder, arrow::int32(),
"int32")
CONVERT_TO_ARROW_TYPE(DataType::type::INT64, int64_t, arrow::Int64Type,
arrow::Int64Array, arrow::Int64Builder, arrow::int64(),
"int64")
CONVERT_TO_ARROW_TYPE(DataType::type::FLOAT, float, arrow::FloatType,
arrow::FloatArray, arrow::FloatBuilder, arrow::float32(),
"float")
CONVERT_TO_ARROW_TYPE(DataType::type::DOUBLE, double, arrow::DoubleType,
CONVERT_TO_ARROW_TYPE(Type::BOOL, bool, arrow::BooleanType, arrow::BooleanArray,
arrow::BooleanBuilder, arrow::boolean(), "boolean")
CONVERT_TO_ARROW_TYPE(Type::INT32, int32_t, arrow::Int32Type, arrow::Int32Array,
arrow::Int32Builder, arrow::int32(), "int32")
CONVERT_TO_ARROW_TYPE(Type::INT64, int64_t, arrow::Int64Type, arrow::Int64Array,
arrow::Int64Builder, arrow::int64(), "int64")
CONVERT_TO_ARROW_TYPE(Type::FLOAT, float, arrow::FloatType, arrow::FloatArray,
arrow::FloatBuilder, arrow::float32(), "float")
CONVERT_TO_ARROW_TYPE(Type::DOUBLE, double, arrow::DoubleType,
arrow::DoubleArray, arrow::DoubleBuilder,
arrow::float64(), "double")
CONVERT_TO_ARROW_TYPE(DataType::type::STRING, std::string, arrow::StringType,
CONVERT_TO_ARROW_TYPE(Type::STRING, std::string, arrow::StringType,
arrow::StringArray, arrow::StringBuilder, arrow::utf8(),
"string")

Expand Down
103 changes: 66 additions & 37 deletions include/gar/utils/data_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ limitations under the License.
#include <map>
#include <memory>
#include <string>
#include <utility>

#include "gar/utils/macros.h"

Expand All @@ -29,58 +30,86 @@ class DataType;

namespace GAR_NAMESPACE_INTERNAL {

/// \brief Main data type enumeration
enum class Type {
/// Boolean as 1 bit, LSB bit-packed ordering
BOOL = 0,

/// Signed 32-bit little-endian integer
INT32,

/// Signed 64-bit little-endian integer
INT64,

/// 4-byte floating point value
FLOAT,

/// 8-byte floating point value
DOUBLE,

/// UTF8 variable-length string as List<Char>
STRING,

/// User-defined data type
USER_DEFINED,

// Leave this at the end
MAX_ID,
};

/// \brief The DataType struct to provide enum type for data type and functions
/// to parse data type.
struct DataType {
/// \brief Main data type enumeration
enum type {
/// Boolean as 1 bit, LSB bit-packed ordering
BOOL = 0,
class DataType {
public:
DataType() : id_(Type::BOOL) {}

/// Signed 32-bit little-endian integer
INT32 = 1,
explicit DataType(Type id, const std::string& user_defined_type_name = "")
: id_(id), user_defined_type_name_(user_defined_type_name) {}

/// Signed 64-bit little-endian integer
INT64 = 2,
DataType(const DataType& other)
: id_(other.id_),
user_defined_type_name_(other.user_defined_type_name_) {}

/// 4-byte floating point value
FLOAT = 3,
explicit DataType(DataType&& other)
: id_(other.id_),
user_defined_type_name_(std::move(other.user_defined_type_name_)) {}

/// 8-byte floating point value
DOUBLE = 4,
inline DataType& operator=(const DataType& other) = default;

/// UTF8 variable-length string as List<Char>
STRING = 5,
bool Equals(const DataType& other) const {
return id_ == other.id_ &&
user_defined_type_name_ == other.user_defined_type_name_;
}

// Leave this at the end
MAX_ID = 6,
};
bool operator==(const DataType& other) const { return Equals(other); }

static std::shared_ptr<arrow::DataType> DataTypeToArrowDataType(
DataType::type type_id);
DataType type_id);

static DataType::type ArrowDataTypeToDataType(
static DataType ArrowDataTypeToDataType(
std::shared_ptr<arrow::DataType> type);

static DataType::type StringToDataType(const std::string& str) {
static const std::map<std::string, DataType::type> str2type{
{"bool", DataType::type::BOOL}, {"int32", DataType::type::INT32},
{"int64", DataType::type::INT64}, {"float", DataType::type::FLOAT},
{"double", DataType::type::DOUBLE}, {"string", DataType::type::STRING}};
try {
return str2type.at(str.c_str());
} catch (const std::exception& e) {
throw std::runtime_error("KeyError: " + str);
static DataType TypeNameToDataType(const std::string& str) {
static const std::map<std::string, Type> str2type{
{"bool", Type::BOOL}, {"int32", Type::INT32},
{"int64", Type::INT64}, {"float", Type::FLOAT},
{"double", Type::DOUBLE}, {"string", Type::STRING}};

if (str2type.find(str) == str2type.end()) {
return DataType(Type::USER_DEFINED, str);
}
return DataType(str2type.at(str.c_str()));
}
static const char* DataTypeToString(DataType::type type) {
static const std::map<DataType::type, const char*> type2str{
{DataType::type::BOOL, "bool"}, {DataType::type::INT32, "int32"},
{DataType::type::INT64, "int64"}, {DataType::type::FLOAT, "float"},
{DataType::type::DOUBLE, "double"}, {DataType::type::STRING, "string"}};
return type2str.at(type);
}
}; // struct Type

/// \brief Return the type category of the DataType.
Type id() const { return id_; }

std::string ToTypeName() const;

private:
Type id_;
std::string user_defined_type_name_;
}; // struct DataType
} // namespace GAR_NAMESPACE_INTERNAL

#endif // GAR_UTILS_DATA_TYPE_H_
1 change: 0 additions & 1 deletion include/gar/utils/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ limitations under the License.
#include "gar/utils/result.h"

#define REGULAR_SEPERATOR "_"
#define GAR_VERSION 1

namespace arrow {
class Array;
Expand Down
Loading