From f3edfd8a40242392414585e827d329bf3cacb0de Mon Sep 17 00:00:00 2001 From: acezen Date: Thu, 8 Dec 2022 15:54:14 +0800 Subject: [PATCH 01/11] Initialize Fix Fix Fix Fix Fix Fix Fix Fix Fix Update Update Update Update Update Update Update Update Update Update Update Update Update Update Update Update Fix Fix Fix Update update --- include/gar/graph_info.h | 32 +++-- include/gar/utils/convert_to_arrow_type.h | 14 +-- include/gar/utils/data_type.h | 84 ++++++++----- include/gar/utils/version_parser.h | 74 ++++++++++++ include/gar/writer/edges_builder.h | 4 +- include/gar/writer/vertices_builder.h | 4 +- src/arrow_chunk_writer.cc | 2 +- src/data_type.cc | 54 ++++++--- src/edges_builder.cc | 41 ++++--- src/graph.cc | 34 +++--- src/graph_info.cc | 35 ++++-- src/utils.cc | 1 - src/version_paser.cc | 110 ++++++++++++++++++ src/vertices_builder.cc | 35 +++--- test/test_example/test_bfs_father_example.cc | 7 +- test/test_example/test_bfs_pull_example.cc | 2 +- test/test_example/test_bfs_push_example.cc | 2 +- test/test_example/test_bfs_stream_example.cc | 2 +- test/test_example/test_bgl_example.cc | 5 +- test/test_example/test_cc_push_example.cc | 2 +- test/test_example/test_cc_stream_example.cc | 2 +- .../test_construct_info_example.cc | 17 +-- test/test_example/test_pagerank_example.cc | 2 +- test/test_info.cc | 30 +++-- 24 files changed, 436 insertions(+), 159 deletions(-) create mode 100644 include/gar/utils/version_parser.h create mode 100644 src/version_paser.cc diff --git a/include/gar/graph_info.h b/include/gar/graph_info.h index e7fb25e0d..c25611bb3 100644 --- a/include/gar/graph_info.h +++ b/include/gar/graph_info.h @@ -28,6 +28,7 @@ limitations under the License. #include "utils/status.h" #include "utils/utils.h" #include "utils/yaml.h" +#include "utils/version_parser.h" namespace GAR_NAMESPACE_INTERNAL { @@ -36,12 +37,12 @@ class Yaml; /// Property is a struct to store the property information. struct Property { std::string name; // property name - DataType::type type; // property data type + DataType type; // property data type bool is_primary; // primary key tag }; static bool operator==(const Property& lhs, const Property& rhs) { - return (lhs.name == rhs.name) && (lhs.type == rhs.type) && + return (lhs.name == rhs.name) && (lhs.type.Equals(rhs.type)) && (lhs.is_primary == rhs.is_primary); } @@ -122,8 +123,9 @@ class VertexInfo { * @param prefix prefix of the vertex info. */ explicit VertexInfo(const std::string& label, IdType chunk_size, + const InfoVersion& version, const std::string& prefix = "") - : label_(label), chunk_size_(chunk_size), prefix_(prefix) { + : label_(label), chunk_size_(chunk_size), version_(version), prefix_(prefix) { if (prefix_.empty()) { prefix_ = label_ + "/"; // default prefix } @@ -157,6 +159,10 @@ class VertexInfo { property_groups_.push_back(property_group); for (const auto& p : property_group.GetProperties()) { + if (!version_.CheckType(p.type.ToTypeName())) { + return Status::Invalid( + "The property type is not supported by the version."); + } p2type_[p.name] = p.type; p2primary_[p.name] = p.is_primary; p2group_index_[p.name] = property_groups_.size() - 1; @@ -188,7 +194,7 @@ class VertexInfo { } /// Get the data type of property - inline Result GetPropertyType( + inline Result GetPropertyType( const std::string& property_name) const noexcept { if (p2type_.find(property_name) == p2type_.end()) { return Status::KeyError("The property is not found."); @@ -285,9 +291,10 @@ class VertexInfo { private: std::string label_; IdType chunk_size_; + InfoVersion version_; std::string prefix_; std::vector property_groups_; - std::map p2type_; + std::map p2type_; std::map p2primary_; std::map p2group_index_; }; @@ -315,6 +322,7 @@ class EdgeInfo { explicit EdgeInfo(const std::string& src_label, const std::string& edge_label, const std::string& dst_label, IdType chunk_size, IdType src_chunk_size, IdType dst_chunk_size, bool directed, + const InfoVersion& version, const std::string& prefix = "") : src_label_(src_label), edge_label_(edge_label), @@ -323,6 +331,7 @@ class EdgeInfo { src_chunk_size_(src_chunk_size), dst_chunk_size_(dst_chunk_size), directed_(directed), + version_(version), prefix_(prefix) { if (prefix_.empty()) { prefix_ = src_label_ + REGULAR_SEPERATOR + edge_label_ + @@ -391,6 +400,10 @@ class EdgeInfo { } adj_list2property_groups_[adj_list_type].push_back(property_group); for (auto& p : property_group.GetProperties()) { + if (!version_.CheckType(p.type.ToTypeName())) { + return Status::Invalid( + "The property type is not supported by the version."); + } p2type_[p.name] = p.type; p2primary_[p.name] = p.is_primary; p2group_index_[p.name][adj_list_type] = @@ -587,7 +600,7 @@ class EdgeInfo { } /// Get the data type of property - Result GetPropertyType(const std::string& property) const + Result GetPropertyType(const std::string& property) const noexcept { if (p2type_.find(property) == p2type_.end()) { return Status::KeyError("The property is not found."); @@ -687,8 +700,9 @@ class EdgeInfo { std::string dst_label_; IdType chunk_size_, src_chunk_size_, dst_chunk_size_; bool directed_; + InfoVersion version_; std::string prefix_; - std::map p2type_; + std::map p2type_; std::map p2primary_; std::map> p2group_index_; std::map adj_list2prefix_; @@ -707,8 +721,9 @@ class GraphInfo { * @param[in] prefix absolute path prefix to store chunk files of graph. */ explicit GraphInfo(const std::string& graph_name, + const InfoVersion& version, const std::string& prefix = "./") - : name_(graph_name), prefix_(prefix) {} + : name_(graph_name), version_(version), prefix_(prefix) {} /** * @brief Loads the input file as a GraphInfo instance. @@ -873,6 +888,7 @@ class GraphInfo { private: std::string name_; + InfoVersion version_; std::string prefix_; std::map vertex2info_; // label -> info std::map diff --git a/include/gar/utils/convert_to_arrow_type.h b/include/gar/utils/convert_to_arrow_type.h index 3fdaf94e4..34707a1cb 100644 --- a/include/gar/utils/convert_to_arrow_type.h +++ b/include/gar/utils/convert_to_arrow_type.h @@ -27,7 +27,7 @@ limitations under the License. namespace GAR_NAMESPACE_INTERNAL { /// \brief Struct to convert DataType to arrow::DataType. -template +template struct ConvertToArrowType {}; #define CONVERT_TO_ARROW_TYPE(type, c_type, arrow_type, array_type, \ @@ -42,22 +42,22 @@ struct ConvertToArrowType {}; static const char* type_to_string() { return str; } \ }; -CONVERT_TO_ARROW_TYPE(DataType::type::BOOL, bool, arrow::BooleanType, +CONVERT_TO_ARROW_TYPE(Type::BOOL, bool, arrow::BooleanType, arrow::BooleanArray, arrow::BooleanBuilder, arrow::boolean(), "boolean") -CONVERT_TO_ARROW_TYPE(DataType::type::INT32, int32_t, arrow::Int32Type, +CONVERT_TO_ARROW_TYPE(Type::INT32, int32_t, arrow::Int32Type, arrow::Int32Array, arrow::Int32Builder, arrow::int32(), "int32") -CONVERT_TO_ARROW_TYPE(DataType::type::INT64, int64_t, arrow::Int64Type, +CONVERT_TO_ARROW_TYPE(Type::INT64, int64_t, arrow::Int64Type, arrow::Int64Array, arrow::Int64Builder, arrow::int64(), "int64") -CONVERT_TO_ARROW_TYPE(DataType::type::FLOAT, float, arrow::FloatType, +CONVERT_TO_ARROW_TYPE(Type::FLOAT, float, arrow::FloatType, arrow::FloatArray, arrow::FloatBuilder, arrow::float32(), "float") -CONVERT_TO_ARROW_TYPE(DataType::type::DOUBLE, double, arrow::DoubleType, +CONVERT_TO_ARROW_TYPE(Type::DOUBLE, double, arrow::DoubleType, arrow::DoubleArray, arrow::DoubleBuilder, arrow::float64(), "double") -CONVERT_TO_ARROW_TYPE(DataType::type::STRING, std::string, arrow::StringType, +CONVERT_TO_ARROW_TYPE(Type::STRING, std::string, arrow::StringType, arrow::StringArray, arrow::StringBuilder, arrow::utf8(), "string") diff --git a/include/gar/utils/data_type.h b/include/gar/utils/data_type.h index 07a4676f7..64d6e3d2c 100644 --- a/include/gar/utils/data_type.h +++ b/include/gar/utils/data_type.h @@ -29,58 +29,80 @@ class DataType; namespace GAR_NAMESPACE_INTERNAL { -/// \brief The DataType struct to provide enum type for data type and functions -/// to parse data type. -struct DataType { - /// \brief Main data type enumeration - enum type { +/// \brief Main data type enumeration +enum class Type { /// Boolean as 1 bit, LSB bit-packed ordering BOOL = 0, /// Signed 32-bit little-endian integer - INT32 = 1, + INT32, /// Signed 64-bit little-endian integer - INT64 = 2, + INT64, /// 4-byte floating point value - FLOAT = 3, + FLOAT, /// 8-byte floating point value - DOUBLE = 4, + DOUBLE, /// UTF8 variable-length string as List - STRING = 5, + STRING, + + /// User-defined data type + USER_DEFINED, // Leave this at the end - MAX_ID = 6, - }; + MAX_ID, +}; + +/// \brief The DataType struct to provide enum type for data type and functions +/// to parse data type. +class DataType { + public: + DataType() : id_(Type::BOOL) {} + + explicit DataType(Type id, const std::string& user_defined_type_name = "") : id_(id), user_defined_type_name_(user_defined_type_name) {} + + DataType(const DataType& other) : id_(other.id_), user_defined_type_name_(other.user_defined_type_name_) {} + + explicit DataType(DataType&& other) : id_(other.id_), user_defined_type_name_(std::move(other.user_defined_type_name_)) {} + + inline DataType& operator=(const DataType& other) = default; + + bool Equals(const DataType& other) const { + return id_ == other.id_ && user_defined_type_name_ == other.user_defined_type_name_; + } + + bool operator==(const DataType& other) const { return Equals(other); } static std::shared_ptr DataTypeToArrowDataType( - DataType::type type_id); + DataType type_id); - static DataType::type ArrowDataTypeToDataType( + static DataType ArrowDataTypeToDataType( std::shared_ptr type); - static DataType::type StringToDataType(const std::string& str) { - static const std::map str2type{ - {"bool", DataType::type::BOOL}, {"int32", DataType::type::INT32}, - {"int64", DataType::type::INT64}, {"float", DataType::type::FLOAT}, - {"double", DataType::type::DOUBLE}, {"string", DataType::type::STRING}}; - try { - return str2type.at(str.c_str()); - } catch (const std::exception& e) { - throw std::runtime_error("KeyError: " + str); + static DataType TypeNameToDataType(const std::string& str) { + static const std::map str2type{ + {"bool", Type::BOOL}, {"int32", Type::INT32}, + {"int64", Type::INT64}, {"float", Type::FLOAT}, + {"double", Type::DOUBLE}, {"string", Type::STRING}}; + + if (str2type.find(str) == str2type.end()) { + return DataType(Type::USER_DEFINED, str); } + return DataType(str2type.at(str.c_str())); } - static const char* DataTypeToString(DataType::type type) { - static const std::map type2str{ - {DataType::type::BOOL, "bool"}, {DataType::type::INT32, "int32"}, - {DataType::type::INT64, "int64"}, {DataType::type::FLOAT, "float"}, - {DataType::type::DOUBLE, "double"}, {DataType::type::STRING, "string"}}; - return type2str.at(type); - } -}; // struct Type + + /// \brief Return the type category of the DataType. + Type id() const { return id_; } + + std::string ToTypeName() const; + + private: + Type id_; + std::string user_defined_type_name_; +}; // struct DataType } // namespace GAR_NAMESPACE_INTERNAL #endif // GAR_UTILS_DATA_TYPE_H_ diff --git a/include/gar/utils/version_parser.h b/include/gar/utils/version_parser.h new file mode 100644 index 000000000..f758bbd1e --- /dev/null +++ b/include/gar/utils/version_parser.h @@ -0,0 +1,74 @@ +/** Copyright 2022 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#ifndef GAR_UTILS_VERSION_PARSER_H_ +#define GAR_UTILS_VERSION_PARSER_H_ + +#include +#include +#include +#include + +#include "gar/utils/result.h" + +namespace GAR_NAMESPACE_INTERNAL { + +/// \brief InfoVersion is a class provide version information of info. +class InfoVersion { + public: + // inline static const std::regex version_regex{"gar/v\\d+\\"}; + // inline static const std::regex user_define_types_regex{"\\(.*\\)"}; + static Result Parse(const std::string& str) noexcept; + + InfoVersion() : version_(version2types.rbegin()->first) {} + explicit InfoVersion(int version) : version_(version) {} + InfoVersion(const InfoVersion& other) = default; + inline InfoVersion& operator=(const InfoVersion& other) = default; + + std::string ToString() const { + std::string str = "gar/" + std::to_string(version_); + if (!user_define_types_.empty()) { + str += " ("; + for (auto& type : user_define_types_) { + str += type + ","; + } + str.back() = ')'; + } + return str; + } + + inline bool CheckType(const std::string& type_str) noexcept { + auto& types = version2types.at(version_); + // check if type_str is in supported types of version + if (std::find(types.begin(), types.end(), type_str) != types.end()) { + return true; + } + // check if type_str is in user defined types + if (std::find(user_define_types_.begin(), user_define_types_.end(), type_str) != user_define_types_.end()) { + return true; + } + return false; + } + + private: + inline static const std::map> version2types{ + {1, {"bool", "int32", "int64", "float", "double", "string"}}, + }; + int version_; + std::vector user_define_types_; +}; + +} // namespace GAR_NAMESPACE_INTERNAL +#endif // GAR_UTILS_VERSION_PARSER_H_ diff --git a/include/gar/writer/edges_builder.h b/include/gar/writer/edges_builder.h index caccaee36..0c48cbce7 100644 --- a/include/gar/writer/edges_builder.h +++ b/include/gar/writer/edges_builder.h @@ -303,7 +303,7 @@ class EdgesBuilder { * @param edges The edges of a specific vertex chunk. * @return Status: ok or Status::TypeError error. */ - Status appendToArray(DataType::type type, const std::string& property_name, + Status appendToArray(const DataType& type, const std::string& property_name, std::shared_ptr& array, // NOLINT const std::vector& edges); @@ -317,7 +317,7 @@ class EdgesBuilder { * @param edges The edges of a specific vertex chunk. * @return Status: ok or Status::ArrowError error. */ - template + template Status tryToAppend(const std::string& property_name, std::shared_ptr& array, // NOLINT const std::vector& edges); diff --git a/include/gar/writer/vertices_builder.h b/include/gar/writer/vertices_builder.h index 60c79deff..fa06c4de0 100644 --- a/include/gar/writer/vertices_builder.h +++ b/include/gar/writer/vertices_builder.h @@ -230,7 +230,7 @@ class VerticesBuilder { * @param array The constructed array. * @return Status: ok or Status::TypeError error. */ - Status appendToArray(DataType::type type, const std::string& property_name, + Status appendToArray(const DataType& type, const std::string& property_name, std::shared_ptr& array); // NOLINT /** @@ -241,7 +241,7 @@ class VerticesBuilder { * @param array The array to append. * @return Status: ok or Status::ArrowError error. */ - template + template Status tryToAppend(const std::string& property_name, std::shared_ptr& array); // NOLINT diff --git a/src/arrow_chunk_writer.cc b/src/arrow_chunk_writer.cc index ec2819c3f..d5bbda2ac 100644 --- a/src/arrow_chunk_writer.cc +++ b/src/arrow_chunk_writer.cc @@ -433,7 +433,7 @@ Result> EdgeChunkWriter::getOffsetTable( std::vector> schema_vector; std::string property = GeneralParams::kOffsetCol; schema_vector.push_back(arrow::field( - property, DataType::DataTypeToArrowDataType(DataType::type::INT64))); + property, DataType::DataTypeToArrowDataType(DataType(Type::INT64)))); int64_t global_index = 0; for (IdType i = begin_index; i < end_index; i++) { diff --git a/src/data_type.cc b/src/data_type.cc index f94f63fe4..c96677f79 100644 --- a/src/data_type.cc +++ b/src/data_type.cc @@ -23,43 +23,67 @@ limitations under the License. namespace GAR_NAMESPACE_INTERNAL { std::shared_ptr DataType::DataTypeToArrowDataType( - DataType::type type_id) { - switch (type_id) { - case DataType::type::BOOL: + DataType type) { + switch (type.id()) { + case Type::BOOL: return arrow::boolean(); - case DataType::type::INT32: + case Type::INT32: return arrow::int32(); - case DataType::type::INT64: + case Type::INT64: return arrow::int64(); - case DataType::type::FLOAT: + case Type::FLOAT: return arrow::float32(); - case DataType::type::DOUBLE: + case Type::DOUBLE: return arrow::float64(); - case DataType::type::STRING: + case Type::STRING: return arrow::utf8(); default: throw std::runtime_error("Unsupported data type"); } } -DataType::type DataType::ArrowDataTypeToDataType( +DataType DataType::ArrowDataTypeToDataType( std::shared_ptr type) { switch (type->id()) { case arrow::Type::BOOL: - return DataType::type::BOOL; + return DataType(Type::BOOL); case arrow::Type::INT32: - return DataType::type::INT32; + return DataType(Type::INT32); case arrow::Type::INT64: - return DataType::type::INT64; + return DataType(Type::INT64); case arrow::Type::FLOAT: - return DataType::type::FLOAT; + return DataType(Type::FLOAT); case arrow::Type::DOUBLE: - return DataType::type::DOUBLE; + return DataType(Type::DOUBLE); case arrow::Type::STRING: - return DataType::type::STRING; + return DataType(Type::STRING); default: throw std::runtime_error("Unsupported data type"); } } +std::string DataType::ToTypeName() const { + switch (id_) { +#define TO_STRING_CASE(_id) \ + case Type::_id: { \ + std::string name(GAR_STRINGIFY(_id)); \ + std::transform(name.begin(), name.end(), name.begin(), ::tolower); \ + return name; \ + } + + TO_STRING_CASE(BOOL) + TO_STRING_CASE(INT32) + TO_STRING_CASE(INT64) + TO_STRING_CASE(FLOAT) + TO_STRING_CASE(DOUBLE) + TO_STRING_CASE(STRING) + +#undef TO_STRING_CASE + case Type::USER_DEFINED: + return user_defined_type_name_; + default: + return "unknown"; + } +} + } // namespace GAR_NAMESPACE_INTERNAL diff --git a/src/edges_builder.cc b/src/edges_builder.cc index f405fb6ee..750de9b48 100644 --- a/src/edges_builder.cc +++ b/src/edges_builder.cc @@ -21,29 +21,29 @@ namespace GAR_NAMESPACE_INTERNAL { namespace builder { Status EdgesBuilder::appendToArray( - DataType::type type, const std::string& property_name, + const DataType& type, const std::string& property_name, std::shared_ptr& array, // NOLINT const std::vector& edges) { - switch (type) { - case DataType::type::BOOL: - return tryToAppend(property_name, array, edges); - case DataType::type::INT32: - return tryToAppend(property_name, array, edges); - case DataType::type::INT64: - return tryToAppend(property_name, array, edges); - case DataType::type::FLOAT: - return tryToAppend(property_name, array, edges); - case DataType::type::DOUBLE: - return tryToAppend(property_name, array, edges); - case DataType::type::STRING: - return tryToAppend(property_name, array, edges); + switch (type.id()) { + case Type::BOOL: + return tryToAppend(property_name, array, edges); + case Type::INT32: + return tryToAppend(property_name, array, edges); + case Type::INT64: + return tryToAppend(property_name, array, edges); + case Type::FLOAT: + return tryToAppend(property_name, array, edges); + case Type::DOUBLE: + return tryToAppend(property_name, array, edges); + case Type::STRING: + return tryToAppend(property_name, array, edges); default: return Status::TypeError(); } return Status::TypeError(); } -template +template Status EdgesBuilder::tryToAppend( const std::string& property_name, std::shared_ptr& array, // NOLINT @@ -93,25 +93,24 @@ Result> EdgesBuilder::convertToTable( std::shared_ptr array; schema_vector.push_back( arrow::field(GeneralParams::kSrcIndexCol, - DataType::DataTypeToArrowDataType(DataType::type::INT64))); + DataType::DataTypeToArrowDataType(DataType(Type::INT64)))); GAR_RETURN_NOT_OK(tryToAppend(1, array, edges)); arrays.push_back(array); // add dst schema_vector.push_back( arrow::field(GeneralParams::kDstIndexCol, - DataType::DataTypeToArrowDataType(DataType::type::INT64))); + DataType::DataTypeToArrowDataType(DataType(Type::INT64)))); GAR_RETURN_NOT_OK(tryToAppend(0, array, edges)); arrays.push_back(array); // add properties for (auto& property_group : property_groups) { for (auto& property : property_group.GetProperties()) { // add a column to schema - DataType::type type = property.type; schema_vector.push_back( - arrow::field(property.name, DataType::DataTypeToArrowDataType(type))); + arrow::field(property.name, DataType::DataTypeToArrowDataType(property.type))); // add a column to data std::shared_ptr array; - GAR_RETURN_NOT_OK(appendToArray(type, property.name, array, edges)); + GAR_RETURN_NOT_OK(appendToArray(property.type, property.name, array, edges)); arrays.push_back(array); } } @@ -130,7 +129,7 @@ Result> EdgesBuilder::getOffsetTable( std::vector> schema_vector; schema_vector.push_back( arrow::field(GeneralParams::kOffsetCol, - DataType::DataTypeToArrowDataType(DataType::type::INT64))); + DataType::DataTypeToArrowDataType(DataType(Type::INT64)))); size_t index = 0; for (IdType i = begin_index; i < end_index; i++) { diff --git a/src/graph.cc b/src/graph.cc index d9374a862..1706a71e9 100644 --- a/src/graph.cc +++ b/src/graph.cc @@ -18,7 +18,7 @@ limitations under the License. namespace GAR_NAMESPACE_INTERNAL { -template +template Status CastToAny(std::shared_ptr array, std::any& any) { // NOLINT using ArrayType = typename ConvertToArrowType::ArrayType; @@ -28,30 +28,30 @@ Status CastToAny(std::shared_ptr array, } template <> -Status CastToAny(std::shared_ptr array, +Status CastToAny(std::shared_ptr array, std::any& any) { // NOLINT using ArrayType = - typename ConvertToArrowType::ArrayType; + typename ConvertToArrowType::ArrayType; auto column = std::dynamic_pointer_cast(array); any = column->GetString(0); return Status::OK(); } -Status TryToCastToAny(DataType::type type, std::shared_ptr array, +Status TryToCastToAny(const DataType& type, std::shared_ptr array, std::any& any) { // NOLINT - switch (type) { - case DataType::type::BOOL: - return CastToAny(array, any); - case DataType::type::INT32: - return CastToAny(array, any); - case DataType::type::INT64: - return CastToAny(array, any); - case DataType::type::FLOAT: - return CastToAny(array, any); - case DataType::type::DOUBLE: - return CastToAny(array, any); - case DataType::type::STRING: - return CastToAny(array, any); + switch (type.id()) { + case Type::BOOL: + return CastToAny(array, any); + case Type::INT32: + return CastToAny(array, any); + case Type::INT64: + return CastToAny(array, any); + case Type::FLOAT: + return CastToAny(array, any); + case Type::DOUBLE: + return CastToAny(array, any); + case Type::STRING: + return CastToAny(array, any); default: return Status::TypeError(); } diff --git a/src/graph_info.cc b/src/graph_info.cc index 132ef935e..a0ae644fa 100644 --- a/src/graph_info.cc +++ b/src/graph_info.cc @@ -34,7 +34,12 @@ Result VertexInfo::Load(std::shared_ptr yaml) { if (yaml->operator[]("prefix")) { prefix = yaml->operator[]("prefix").as(); } - VertexInfo vertex_info(label, chunk_size, prefix); + InfoVersion version; + if (yaml->operator[]("version")) { + GAR_ASSIGN_OR_RAISE(version, + InfoVersion::Parse(yaml->operator[]("version").as())); + } + VertexInfo vertex_info(label, chunk_size, version, prefix); auto property_groups = yaml->operator[]("property_groups"); if (property_groups) { // property_groups exist for (YAML::const_iterator it = property_groups.begin(); @@ -51,7 +56,7 @@ Result VertexInfo::Load(std::shared_ptr yaml) { iit != properties.end(); ++iit) { Property property; property.name = iit->operator[]("name").as(); - property.type = DataType::StringToDataType( + property.type = DataType::TypeNameToDataType( iit->operator[]("data_type").as()); property.is_primary = iit->operator[]("is_primary").as(); property_vec.push_back(property); @@ -80,13 +85,13 @@ Result VertexInfo::Dump() const noexcept { for (auto& p : pg.GetProperties()) { YAML::Node p_node; p_node["name"] = p.name; - p_node["data_type"] = DataType::DataTypeToString(p.type); + p_node["data_type"] = p.type.ToTypeName(); p_node["is_primary"] = p.is_primary; pg_node["properties"].push_back(p_node); } node["property_groups"].push_back(pg_node); } - node["version"] = GAR_VERSION; + node["version"] = version_.ToString(); return YAML::Dump(node); } @@ -115,9 +120,14 @@ Result EdgeInfo::Load(std::shared_ptr yaml) { if (yaml->operator[]("prefix")) { prefix = yaml->operator[]("prefix").as(); } + InfoVersion version; + if (yaml->operator[]("version")) { + GAR_ASSIGN_OR_RAISE(version, + InfoVersion::Parse(yaml->operator[]("version").as())); + } EdgeInfo edge_info(src_label, edge_label, dst_label, chunk_size, - src_chunk_size, dst_chunk_size, directed, prefix); + src_chunk_size, dst_chunk_size, directed, version, prefix); auto adj_lists = yaml->operator[]("adj_lists"); if (adj_lists) { @@ -151,7 +161,7 @@ Result EdgeInfo::Load(std::shared_ptr yaml) { p_it != properties.end(); ++p_it) { Property property; property.name = p_it->operator[]("name").as(); - property.type = DataType::StringToDataType( + property.type = DataType::TypeNameToDataType( p_it->operator[]("data_type").as()); property.is_primary = p_it->operator[]("is_primary").as(); property_vec.push_back(property); @@ -196,7 +206,7 @@ Result EdgeInfo::Dump() const noexcept { for (auto& p : pg.GetProperties()) { YAML::Node p_node; p_node["name"] = p.name; - p_node["data_type"] = DataType::DataTypeToString(p.type); + p_node["data_type"] = p.type.ToTypeName(); p_node["is_primary"] = p.is_primary; pg_node["properties"].push_back(p_node); } @@ -204,7 +214,7 @@ Result EdgeInfo::Dump() const noexcept { } node["adj_lists"].push_back(adj_list_node); } - node["version"] = GAR_VERSION; + node["version"] = version_.ToString(); return YAML::Dump(node); } @@ -244,7 +254,12 @@ Result GraphInfo::Load(const std::string& input, if (graph_meta->operator[]("prefix")) { prefix = graph_meta->operator[]("prefix").as(); } - GraphInfo graph_info(name, prefix); + InfoVersion version; + if (graph_meta->operator[]("version")) { + GAR_ASSIGN_OR_RAISE(version, + InfoVersion::Parse(graph_meta->operator[]("version").as())); + } + GraphInfo graph_info(name, version, prefix); std::string no_url_path; GAR_ASSIGN_OR_RAISE(auto fs, @@ -290,7 +305,7 @@ Result GraphInfo::Dump() const noexcept { for (auto& path : edge_paths_) { node["edges"].push_back(path); } - node["version"] = GAR_VERSION; + node["version"] = version_.ToString(); return YAML::Dump(node); } diff --git a/src/utils.cc b/src/utils.cc index 61f8df7e3..69a9d9454 100644 --- a/src/utils.cc +++ b/src/utils.cc @@ -86,5 +86,4 @@ std::string ValueGetter::Value(const void* data, int64_t offset) { } } // namespace util - } // namespace GAR_NAMESPACE_INTERNAL diff --git a/src/version_paser.cc b/src/version_paser.cc new file mode 100644 index 000000000..3d7d2b40f --- /dev/null +++ b/src/version_paser.cc @@ -0,0 +1,110 @@ +/** Copyright 2022 Alibaba Group Holding Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include +#include +#include + +#include "gar/utils/version_parser.h" + +namespace GAR_NAMESPACE_INTERNAL { + +// Helper function for parsing version string +bool is_whitespace(char ch) { + return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n'; +} + +void trim(std::string& s) { + size_t trim_left = 0; + for (auto it = s.begin(); it != s.end(); ++it) { + if (!is_whitespace(*it)) { + break; + } + ++trim_left; + } + + if (trim_left == s.size()) { + s.clear(); + } else { + size_t trim_right = 0; + for (auto it = s.rbegin(); it != s.rend(); ++it) { + if (!is_whitespace(*it)) { + break; + } + ++trim_right; + } + + if (trim_left > 0 || trim_right > 0) { + if (trim_left == 0) { + s.resize(s.size() - trim_right); + } else { + std::string copy(s.c_str() + trim_left, s.size() - trim_left - trim_right); + s.swap(copy); + } + } + } +} + +int parserVersionImpl(const std::string& version_str) { + std::smatch match; + const std::regex version_regex("gar/v(\\d+).*"); + if (std::regex_match(version_str, match, version_regex)) { + if (match.size() != 2) { + throw std::runtime_error("Invalid version string: " + version_str); + } + return std::stoi(match[1].str()); + } else { + throw std::runtime_error("Invalid version string: " + version_str); + } +} + +std::vector parseUserDefineTypesImpl(const std::string& version_str) { + std::smatch match; + std::vector user_define_types; + const std::regex user_define_types_regex("gar/v\\d+ *\\((.*)\\).*"); + if (std::regex_match(version_str, match, user_define_types_regex)) { + if (match.size() != 2) { + throw std::runtime_error("Invalid version string: " + version_str); + } + std::string types_str = match[1].str(); + size_t pos = 0; + while (pos != std::string::npos) { + size_t next_pos = types_str.find(',', pos); + std::string type = types_str.substr(pos, next_pos - pos); + trim(type); + if (!type.empty()) { + user_define_types.push_back(type); + } + if (next_pos != std::string::npos) { + pos = next_pos + 1; + } else { + pos = next_pos; + } + } + } + return user_define_types; +} + +Result InfoVersion::Parse(const std::string& version_str) noexcept { + InfoVersion version; + try { + version.version_ = parserVersionImpl(version_str); + version.user_define_types_ = parseUserDefineTypesImpl(version_str); + } catch (const std::exception& e) { + return Status::Invalid("Invalid version string: " + version_str); + } + return version; +} +} // namespace GAR_NAMESPACE_INTERNAL diff --git a/src/vertices_builder.cc b/src/vertices_builder.cc index afd0fb521..fdf00a480 100644 --- a/src/vertices_builder.cc +++ b/src/vertices_builder.cc @@ -20,28 +20,28 @@ namespace GAR_NAMESPACE_INTERNAL { namespace builder { Status VerticesBuilder::appendToArray( - DataType::type type, const std::string& property_name, + const DataType& type, const std::string& property_name, std::shared_ptr& array) { // NOLINT - switch (type) { - case DataType::type::BOOL: - return tryToAppend(property_name, array); - case DataType::type::INT32: - return tryToAppend(property_name, array); - case DataType::type::INT64: - return tryToAppend(property_name, array); - case DataType::type::FLOAT: - return tryToAppend(property_name, array); - case DataType::type::DOUBLE: - return tryToAppend(property_name, array); - case DataType::type::STRING: - return tryToAppend(property_name, array); + switch (type.id()) { + case Type::BOOL: + return tryToAppend(property_name, array); + case Type::INT32: + return tryToAppend(property_name, array); + case Type::INT64: + return tryToAppend(property_name, array); + case Type::FLOAT: + return tryToAppend(property_name, array); + case Type::DOUBLE: + return tryToAppend(property_name, array); + case Type::STRING: + return tryToAppend(property_name, array); default: return Status::TypeError(); } return Status::TypeError(); } -template +template Status VerticesBuilder::tryToAppend( const std::string& property_name, std::shared_ptr& array) { // NOLINT @@ -71,12 +71,11 @@ Result> VerticesBuilder::convertToTable() { for (auto& property_group : property_groups) { for (auto& property : property_group.GetProperties()) { // add a column to schema - DataType::type type = property.type; schema_vector.push_back( - arrow::field(property.name, DataType::DataTypeToArrowDataType(type))); + arrow::field(property.name, DataType::DataTypeToArrowDataType(property.type))); // add a column to data std::shared_ptr array; - appendToArray(type, property.name, array); + appendToArray(property.type, property.name, array); arrays.push_back(array); } } diff --git a/test/test_example/test_bfs_father_example.cc b/test/test_example/test_bfs_father_example.cc index 3c53ab2ef..36e3f0e9a 100644 --- a/test/test_example/test_bfs_father_example.cc +++ b/test/test_example/test_bfs_father_example.cc @@ -83,8 +83,8 @@ TEST_CASE("test_bfs_with_father_example") { // Append the bfs result to the vertex info as a property group // and write to file // construct property group - GAR_NAMESPACE::Property bfs = {"bfs", GAR_NAMESPACE::DataType::INT32, false}; - GAR_NAMESPACE::Property father = {"father", GAR_NAMESPACE::DataType::INT64, + GAR_NAMESPACE::Property bfs = {"bfs", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::INT32), false}; + GAR_NAMESPACE::Property father = {"father", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::INT64), false}; std::vector property_vector = {bfs, father}; GAR_NAMESPACE::PropertyGroup group(property_vector, @@ -143,9 +143,10 @@ TEST_CASE("test_bfs_with_father_example") { dst_label = "person"; int edge_chunk_size = 1024, src_chunk_size = 100, dst_chunk_size = 100; bool directed = true; + GAR_NAMESPACE::InfoVersion version(1); GAR_NAMESPACE::EdgeInfo new_edge_info(src_label, edge_label, dst_label, edge_chunk_size, src_chunk_size, - dst_chunk_size, directed); + dst_chunk_size, directed, version); REQUIRE(new_edge_info .AddAdjList(GAR_NAMESPACE::AdjListType::ordered_by_source, GAR_NAMESPACE::FileType::CSV) diff --git a/test/test_example/test_bfs_pull_example.cc b/test/test_example/test_bfs_pull_example.cc index dd1db6d62..bcd800270 100644 --- a/test/test_example/test_bfs_pull_example.cc +++ b/test/test_example/test_bfs_pull_example.cc @@ -85,7 +85,7 @@ TEST_CASE("test_bfs_using_pull_example") { // extend the original vertex info and write results to gar using writer // construct property group - GAR_NAMESPACE::Property bfs = {"bfs-pull", GAR_NAMESPACE::DataType::INT32, + GAR_NAMESPACE::Property bfs = {"bfs-pull", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::INT32), false}; std::vector property_vector = {bfs}; GAR_NAMESPACE::PropertyGroup group(property_vector, diff --git a/test/test_example/test_bfs_push_example.cc b/test/test_example/test_bfs_push_example.cc index 7ffe852f6..cc78e301a 100644 --- a/test/test_example/test_bfs_push_example.cc +++ b/test/test_example/test_bfs_push_example.cc @@ -84,7 +84,7 @@ TEST_CASE("test_bfs_using_push_example") { // extend the original vertex info and write results to gar using writer // construct property group - GAR_NAMESPACE::Property bfs = {"bfs-push", GAR_NAMESPACE::DataType::INT32, + GAR_NAMESPACE::Property bfs = {"bfs-push", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::INT32), false}; std::vector property_vector = {bfs}; GAR_NAMESPACE::PropertyGroup group(property_vector, diff --git a/test/test_example/test_bfs_stream_example.cc b/test/test_example/test_bfs_stream_example.cc index 0de8052ad..63726a013 100644 --- a/test/test_example/test_bfs_stream_example.cc +++ b/test/test_example/test_bfs_stream_example.cc @@ -75,7 +75,7 @@ TEST_CASE("test_bfs_using_stream_example") { // extend the original vertex info and write results to gar using writer // construct property group - GAR_NAMESPACE::Property bfs = {"bfs-stream", GAR_NAMESPACE::DataType::INT32, + GAR_NAMESPACE::Property bfs = {"bfs-stream", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::INT32), false}; std::vector property_vector = {bfs}; GAR_NAMESPACE::PropertyGroup group(property_vector, diff --git a/test/test_example/test_bgl_example.cc b/test/test_example/test_bgl_example.cc index edb4e6a3c..2b950eca3 100644 --- a/test/test_example/test_bgl_example.cc +++ b/test/test_example/test_bgl_example.cc @@ -106,14 +106,15 @@ TEST_CASE("test_bgl_cc_example") { // method 1 for writing results: construct new vertex type and write results // using vertex builder construct new property group - GAR_NAMESPACE::Property cc = {"cc", GAR_NAMESPACE::DataType::INT32, false}; + GAR_NAMESPACE::Property cc = {"cc", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::INT32), false}; std::vector property_vector = {cc}; GAR_NAMESPACE::PropertyGroup group(property_vector, GAR_NAMESPACE::FileType::PARQUET); // construct new vertex info std::string vertex_label = "cc_result", vertex_prefix = "result/"; int chunk_size = 100; - GAR_NAMESPACE::VertexInfo new_info(vertex_label, chunk_size, vertex_prefix); + GAR_NAMESPACE::InfoVersion version(1); + GAR_NAMESPACE::VertexInfo new_info(vertex_label, chunk_size, version, vertex_prefix); REQUIRE(new_info.AddPropertyGroup(group).ok()); // dump new vertex info REQUIRE(new_info.IsValidated()); diff --git a/test/test_example/test_cc_push_example.cc b/test/test_example/test_cc_push_example.cc index 4196a260c..a4ca0663c 100644 --- a/test/test_example/test_cc_push_example.cc +++ b/test/test_example/test_cc_push_example.cc @@ -119,7 +119,7 @@ TEST_CASE("test_cc_using_push_example") { // extend the original vertex info and write results to gar using writer // construct property group - GAR_NAMESPACE::Property cc = {"cc-push", GAR_NAMESPACE::DataType::INT64, + GAR_NAMESPACE::Property cc = {"cc-push", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::INT64), false}; std::vector property_vector = {cc}; GAR_NAMESPACE::PropertyGroup group(property_vector, diff --git a/test/test_example/test_cc_stream_example.cc b/test/test_example/test_cc_stream_example.cc index 38f46acc0..7b2eb09cd 100644 --- a/test/test_example/test_cc_stream_example.cc +++ b/test/test_example/test_cc_stream_example.cc @@ -86,7 +86,7 @@ TEST_CASE("test_cc_using_stream_example") { // extend the original vertex info and write results to gar using writer // construct property group - GAR_NAMESPACE::Property cc = {"cc", GAR_NAMESPACE::DataType::INT64, false}; + GAR_NAMESPACE::Property cc = {"cc", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::INT64), false}; std::vector property_vector = {cc}; GAR_NAMESPACE::PropertyGroup group(property_vector, GAR_NAMESPACE::FileType::PARQUET); diff --git a/test/test_example/test_construct_info_example.cc b/test/test_example/test_construct_info_example.cc index e1fb19e3d..807b06f7b 100644 --- a/test/test_example/test_construct_info_example.cc +++ b/test/test_example/test_construct_info_example.cc @@ -21,7 +21,8 @@ limitations under the License. TEST_CASE("test_construct_info_example") { /*------------------construct graph info------------------*/ std::string name = "graph", prefix = "file:///tmp/"; - GAR_NAMESPACE::GraphInfo graph_info(name, prefix); + GAR_NAMESPACE::InfoVersion version(1); + GAR_NAMESPACE::GraphInfo graph_info(name, version, prefix); // validate REQUIRE(graph_info.GetName() == name); REQUIRE(graph_info.GetPrefix() == prefix); @@ -33,7 +34,7 @@ TEST_CASE("test_construct_info_example") { /*------------------construct vertex info------------------*/ std::string vertex_label = "person", vertex_prefix = "vertex/person/"; int chunk_size = 100; - GAR_NAMESPACE::VertexInfo vertex_info(vertex_label, chunk_size, + GAR_NAMESPACE::VertexInfo vertex_info(vertex_label, chunk_size, version, vertex_prefix); // validate REQUIRE(vertex_info.GetLabel() == vertex_label); @@ -41,12 +42,12 @@ TEST_CASE("test_construct_info_example") { REQUIRE(vertex_info.GetPropertyGroups().size() == 0); // construct properties and property groups - GAR_NAMESPACE::Property id = {"id", GAR_NAMESPACE::DataType::INT32, true}; + GAR_NAMESPACE::Property id = {"id", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::INT32), true}; GAR_NAMESPACE::Property firstName = {"firstName", - GAR_NAMESPACE::DataType::STRING, false}; + GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::STRING), false}; GAR_NAMESPACE::Property lastName = {"lastName", - GAR_NAMESPACE::DataType::STRING, false}; - GAR_NAMESPACE::Property gender = {"gender", GAR_NAMESPACE::DataType::STRING, + GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::STRING), false}; + GAR_NAMESPACE::Property gender = {"gender", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::STRING), false}; std::vector property_vector_1 = {id}, property_vector_2 = {firstName, lastName, @@ -101,7 +102,7 @@ TEST_CASE("test_construct_info_example") { bool directed = false; GAR_NAMESPACE::EdgeInfo edge_info(src_label, edge_label, dst_label, edge_chunk_size, src_chunk_size, - dst_chunk_size, directed, edge_prefix); + dst_chunk_size, directed, version, edge_prefix); REQUIRE(edge_info.GetSrcLabel() == src_label); REQUIRE(edge_info.GetEdgeLabel() == edge_label); REQUIRE(edge_info.GetDstLabel() == dst_label); @@ -139,7 +140,7 @@ TEST_CASE("test_construct_info_example") { // add property group & validate GAR_NAMESPACE::Property creationDate = { - "creationDate", GAR_NAMESPACE::DataType::STRING, false}; + "creationDate", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::STRING), false}; std::vector property_vector_3 = {creationDate}; GAR_NAMESPACE::PropertyGroup group3(property_vector_3, GAR_NAMESPACE::FileType::PARQUET); diff --git a/test/test_example/test_pagerank_example.cc b/test/test_example/test_pagerank_example.cc index a66f3ed66..25ad776a8 100644 --- a/test/test_example/test_pagerank_example.cc +++ b/test/test_example/test_pagerank_example.cc @@ -86,7 +86,7 @@ TEST_CASE("test_pagerank_example") { // extend the original vertex info and write results to gar using writer // construct property group GAR_NAMESPACE::Property pagerank = {"pagerank", - GAR_NAMESPACE::DataType::DOUBLE, false}; + GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::DOUBLE), false}; std::vector property_vector = {pagerank}; GAR_NAMESPACE::PropertyGroup group(property_vector, GAR_NAMESPACE::FileType::PARQUET); diff --git a/test/test_info.cc b/test/test_info.cc index f82d3b592..6037bb6b7 100644 --- a/test/test_info.cc +++ b/test/test_info.cc @@ -21,6 +21,7 @@ limitations under the License. #include "./config.h" #include "gar/graph_info.h" +#include "gar/utils/version_parser.h" #define CATCH_CONFIG_MAIN #include @@ -28,13 +29,14 @@ limitations under the License. TEST_CASE("test_graph_info") { std::string graph_name = "test_graph"; std::string prefix = "test_prefix"; - GAR_NAMESPACE::GraphInfo graph_info(graph_name, prefix); + GAR_NAMESPACE::InfoVersion version(1); + GAR_NAMESPACE::GraphInfo graph_info(graph_name, version, prefix); REQUIRE(graph_info.GetName() == graph_name); REQUIRE(graph_info.GetPrefix() == prefix); // test add vertex and get vertex info REQUIRE(graph_info.GetAllVertexInfo().size() == 0); - GAR_NAMESPACE::VertexInfo vertex_info("test_vertex", 100, + GAR_NAMESPACE::VertexInfo vertex_info("test_vertex", 100, version, "test_vertex_prefix"); auto st = graph_info.AddVertex(vertex_info); REQUIRE(st.ok()); @@ -52,7 +54,7 @@ TEST_CASE("test_graph_info") { std::string src_label = "test_vertex", edge_label = "test_edge", dst_label = "test_vertex"; GAR_NAMESPACE::EdgeInfo edge_info(src_label, edge_label, dst_label, 1024, 100, - 100, true); + 100, true, version); st = graph_info.AddEdge(edge_info); REQUIRE(st.ok()); REQUIRE(graph_info.GetAllEdgeInfo().size() == 1); @@ -80,7 +82,8 @@ TEST_CASE("test_graph_info") { TEST_CASE("test_vertex_info") { std::string label = "test_vertex"; int chunk_size = 100; - GAR_NAMESPACE::VertexInfo v_info(label, chunk_size); + GAR_NAMESPACE::InfoVersion version(1); + GAR_NAMESPACE::VertexInfo v_info(label, chunk_size, version); REQUIRE(v_info.GetLabel() == label); REQUIRE(v_info.GetChunkSize() == chunk_size); REQUIRE(v_info.GetPrefix() == label + "/"); // default prefix is label + "/" @@ -88,7 +91,7 @@ TEST_CASE("test_vertex_info") { // test add property group GAR_NAMESPACE::Property p; p.name = "id"; - p.type = GAR_NAMESPACE::DataType::INT32; + p.type = GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::INT32); p.is_primary = true; GAR_NAMESPACE::PropertyGroup pg({p}, GAR_NAMESPACE::FileType::CSV); REQUIRE(v_info.GetPropertyGroups().size() == 0); @@ -145,9 +148,10 @@ TEST_CASE("test_edge_info") { int src_chunk_size = 100; int dst_chunk_size = 100; bool directed = true; + GAR_NAMESPACE::InfoVersion version(1); GAR_NAMESPACE::EdgeInfo edge_info(src_label, edge_label, dst_label, chunk_size, src_chunk_size, dst_chunk_size, - directed); + directed, version); REQUIRE(edge_info.GetSrcLabel() == src_label); REQUIRE(edge_info.GetEdgeLabel() == edge_label); REQUIRE(edge_info.GetDstLabel() == dst_label); @@ -215,7 +219,7 @@ TEST_CASE("test_edge_info") { GAR_NAMESPACE::Property p; p.name = "creationDate"; - p.type = GAR_NAMESPACE::DataType::STRING; + p.type = GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::STRING); p.is_primary = false; GAR_NAMESPACE::PropertyGroup pg({p}, file_type); @@ -299,3 +303,15 @@ TEST_CASE("test_graph_info_load_from_file") { REQUIRE(vertex_infos.size() == 1); REQUIRE(edge_infos.size() == 1); } + +TEST_CASE("test_info_version") { + std::string version_str1 = "gar/v1"; + std::cout << version_str1 << std::endl; + GAR_NAMESPACE::InfoVersion::Parse(version_str1); + std::string version_str2 = "gar/v2"; + std::cout << version_str2 << std::endl; + GAR_NAMESPACE::InfoVersion::Parse(version_str2); + std::string version_str3 = "gar/v3 (udd1, udd2)"; + std::cout << version_str3 << std::endl; + GAR_NAMESPACE::InfoVersion::Parse(version_str3); +} From a29104de37c8aede98274843994011164b8290bb Mon Sep 17 00:00:00 2001 From: acezen Date: Mon, 12 Dec 2022 09:50:54 +0800 Subject: [PATCH 02/11] Format and code lint --- include/gar/graph_info.h | 22 ++++----- include/gar/utils/convert_to_arrow_type.h | 20 ++++---- include/gar/utils/data_type.h | 47 +++++++++++-------- include/gar/utils/version_parser.h | 5 +- src/data_type.cc | 16 +++---- src/edges_builder.cc | 7 +-- src/graph.cc | 5 +- src/graph_info.cc | 15 +++--- src/{version_paser.cc => version_parser.cc} | 13 +++-- src/vertices_builder.cc | 4 +- test/test_example/test_bfs_father_example.cc | 7 +-- test/test_example/test_bfs_pull_example.cc | 4 +- test/test_example/test_bfs_push_example.cc | 4 +- test/test_example/test_bfs_stream_example.cc | 4 +- test/test_example/test_bgl_example.cc | 6 ++- test/test_example/test_cc_push_example.cc | 4 +- test/test_example/test_cc_stream_example.cc | 3 +- .../test_construct_info_example.cc | 24 +++++----- test/test_example/test_pagerank_example.cc | 4 +- test/test_info.cc | 18 +++---- 20 files changed, 124 insertions(+), 108 deletions(-) rename src/{version_paser.cc => version_parser.cc} (89%) diff --git a/include/gar/graph_info.h b/include/gar/graph_info.h index c25611bb3..a753cd101 100644 --- a/include/gar/graph_info.h +++ b/include/gar/graph_info.h @@ -27,8 +27,8 @@ limitations under the License. #include "utils/result.h" #include "utils/status.h" #include "utils/utils.h" -#include "utils/yaml.h" #include "utils/version_parser.h" +#include "utils/yaml.h" namespace GAR_NAMESPACE_INTERNAL { @@ -36,9 +36,9 @@ class Yaml; /// Property is a struct to store the property information. struct Property { - std::string name; // property name - DataType type; // property data type - bool is_primary; // primary key tag + std::string name; // property name + DataType type; // property data type + bool is_primary; // primary key tag }; static bool operator==(const Property& lhs, const Property& rhs) { @@ -125,7 +125,10 @@ class VertexInfo { explicit VertexInfo(const std::string& label, IdType chunk_size, const InfoVersion& version, const std::string& prefix = "") - : label_(label), chunk_size_(chunk_size), version_(version), prefix_(prefix) { + : label_(label), + chunk_size_(chunk_size), + version_(version), + prefix_(prefix) { if (prefix_.empty()) { prefix_ = label_ + "/"; // default prefix } @@ -322,8 +325,7 @@ class EdgeInfo { explicit EdgeInfo(const std::string& src_label, const std::string& edge_label, const std::string& dst_label, IdType chunk_size, IdType src_chunk_size, IdType dst_chunk_size, bool directed, - const InfoVersion& version, - const std::string& prefix = "") + const InfoVersion& version, const std::string& prefix = "") : src_label_(src_label), edge_label_(edge_label), dst_label_(dst_label), @@ -600,8 +602,7 @@ class EdgeInfo { } /// Get the data type of property - Result GetPropertyType(const std::string& property) const - noexcept { + Result GetPropertyType(const std::string& property) const noexcept { if (p2type_.find(property) == p2type_.end()) { return Status::KeyError("The property is not found."); } @@ -720,8 +721,7 @@ class GraphInfo { * @param[in] graph_name name of graph * @param[in] prefix absolute path prefix to store chunk files of graph. */ - explicit GraphInfo(const std::string& graph_name, - const InfoVersion& version, + explicit GraphInfo(const std::string& graph_name, const InfoVersion& version, const std::string& prefix = "./") : name_(graph_name), version_(version), prefix_(prefix) {} diff --git a/include/gar/utils/convert_to_arrow_type.h b/include/gar/utils/convert_to_arrow_type.h index 34707a1cb..b4e3a7f24 100644 --- a/include/gar/utils/convert_to_arrow_type.h +++ b/include/gar/utils/convert_to_arrow_type.h @@ -42,18 +42,14 @@ struct ConvertToArrowType {}; static const char* type_to_string() { return str; } \ }; -CONVERT_TO_ARROW_TYPE(Type::BOOL, bool, arrow::BooleanType, - arrow::BooleanArray, arrow::BooleanBuilder, - arrow::boolean(), "boolean") -CONVERT_TO_ARROW_TYPE(Type::INT32, int32_t, arrow::Int32Type, - arrow::Int32Array, arrow::Int32Builder, arrow::int32(), - "int32") -CONVERT_TO_ARROW_TYPE(Type::INT64, int64_t, arrow::Int64Type, - arrow::Int64Array, arrow::Int64Builder, arrow::int64(), - "int64") -CONVERT_TO_ARROW_TYPE(Type::FLOAT, float, arrow::FloatType, - arrow::FloatArray, arrow::FloatBuilder, arrow::float32(), - "float") +CONVERT_TO_ARROW_TYPE(Type::BOOL, bool, arrow::BooleanType, arrow::BooleanArray, + arrow::BooleanBuilder, arrow::boolean(), "boolean") +CONVERT_TO_ARROW_TYPE(Type::INT32, int32_t, arrow::Int32Type, arrow::Int32Array, + arrow::Int32Builder, arrow::int32(), "int32") +CONVERT_TO_ARROW_TYPE(Type::INT64, int64_t, arrow::Int64Type, arrow::Int64Array, + arrow::Int64Builder, arrow::int64(), "int64") +CONVERT_TO_ARROW_TYPE(Type::FLOAT, float, arrow::FloatType, arrow::FloatArray, + arrow::FloatBuilder, arrow::float32(), "float") CONVERT_TO_ARROW_TYPE(Type::DOUBLE, double, arrow::DoubleType, arrow::DoubleArray, arrow::DoubleBuilder, arrow::float64(), "double") diff --git a/include/gar/utils/data_type.h b/include/gar/utils/data_type.h index 64d6e3d2c..4ee9b5dcb 100644 --- a/include/gar/utils/data_type.h +++ b/include/gar/utils/data_type.h @@ -19,6 +19,7 @@ limitations under the License. #include #include #include +#include #include "gar/utils/macros.h" @@ -31,29 +32,29 @@ namespace GAR_NAMESPACE_INTERNAL { /// \brief Main data type enumeration enum class Type { - /// Boolean as 1 bit, LSB bit-packed ordering - BOOL = 0, + /// Boolean as 1 bit, LSB bit-packed ordering + BOOL = 0, - /// Signed 32-bit little-endian integer - INT32, + /// Signed 32-bit little-endian integer + INT32, - /// Signed 64-bit little-endian integer - INT64, + /// Signed 64-bit little-endian integer + INT64, - /// 4-byte floating point value - FLOAT, + /// 4-byte floating point value + FLOAT, - /// 8-byte floating point value - DOUBLE, + /// 8-byte floating point value + DOUBLE, - /// UTF8 variable-length string as List - STRING, + /// UTF8 variable-length string as List + STRING, - /// User-defined data type - USER_DEFINED, + /// User-defined data type + USER_DEFINED, - // Leave this at the end - MAX_ID, + // Leave this at the end + MAX_ID, }; /// \brief The DataType struct to provide enum type for data type and functions @@ -62,16 +63,22 @@ class DataType { public: DataType() : id_(Type::BOOL) {} - explicit DataType(Type id, const std::string& user_defined_type_name = "") : id_(id), user_defined_type_name_(user_defined_type_name) {} + explicit DataType(Type id, const std::string& user_defined_type_name = "") + : id_(id), user_defined_type_name_(user_defined_type_name) {} - DataType(const DataType& other) : id_(other.id_), user_defined_type_name_(other.user_defined_type_name_) {} + DataType(const DataType& other) + : id_(other.id_), + user_defined_type_name_(other.user_defined_type_name_) {} - explicit DataType(DataType&& other) : id_(other.id_), user_defined_type_name_(std::move(other.user_defined_type_name_)) {} + explicit DataType(DataType&& other) + : id_(other.id_), + user_defined_type_name_(std::move(other.user_defined_type_name_)) {} inline DataType& operator=(const DataType& other) = default; bool Equals(const DataType& other) const { - return id_ == other.id_ && user_defined_type_name_ == other.user_defined_type_name_; + return id_ == other.id_ && + user_defined_type_name_ == other.user_defined_type_name_; } bool operator==(const DataType& other) const { return Equals(other); } diff --git a/include/gar/utils/version_parser.h b/include/gar/utils/version_parser.h index f758bbd1e..7636b80de 100644 --- a/include/gar/utils/version_parser.h +++ b/include/gar/utils/version_parser.h @@ -17,9 +17,9 @@ limitations under the License. #define GAR_UTILS_VERSION_PARSER_H_ #include +#include // NOLINT #include #include -#include #include "gar/utils/result.h" @@ -56,7 +56,8 @@ class InfoVersion { return true; } // check if type_str is in user defined types - if (std::find(user_define_types_.begin(), user_define_types_.end(), type_str) != user_define_types_.end()) { + if (std::find(user_define_types_.begin(), user_define_types_.end(), + type_str) != user_define_types_.end()) { return true; } return false; diff --git a/src/data_type.cc b/src/data_type.cc index c96677f79..7a4422d1e 100644 --- a/src/data_type.cc +++ b/src/data_type.cc @@ -64,11 +64,11 @@ DataType DataType::ArrowDataTypeToDataType( std::string DataType::ToTypeName() const { switch (id_) { -#define TO_STRING_CASE(_id) \ - case Type::_id: { \ - std::string name(GAR_STRINGIFY(_id)); \ +#define TO_STRING_CASE(_id) \ + case Type::_id: { \ + std::string name(GAR_STRINGIFY(_id)); \ std::transform(name.begin(), name.end(), name.begin(), ::tolower); \ - return name; \ + return name; \ } TO_STRING_CASE(BOOL) @@ -79,10 +79,10 @@ std::string DataType::ToTypeName() const { TO_STRING_CASE(STRING) #undef TO_STRING_CASE - case Type::USER_DEFINED: - return user_defined_type_name_; - default: - return "unknown"; + case Type::USER_DEFINED: + return user_defined_type_name_; + default: + return "unknown"; } } diff --git a/src/edges_builder.cc b/src/edges_builder.cc index 750de9b48..2a9b46742 100644 --- a/src/edges_builder.cc +++ b/src/edges_builder.cc @@ -106,11 +106,12 @@ Result> EdgesBuilder::convertToTable( for (auto& property_group : property_groups) { for (auto& property : property_group.GetProperties()) { // add a column to schema - schema_vector.push_back( - arrow::field(property.name, DataType::DataTypeToArrowDataType(property.type))); + schema_vector.push_back(arrow::field( + property.name, DataType::DataTypeToArrowDataType(property.type))); // add a column to data std::shared_ptr array; - GAR_RETURN_NOT_OK(appendToArray(property.type, property.name, array, edges)); + GAR_RETURN_NOT_OK( + appendToArray(property.type, property.name, array, edges)); arrays.push_back(array); } } diff --git a/src/graph.cc b/src/graph.cc index 1706a71e9..9389f99a6 100644 --- a/src/graph.cc +++ b/src/graph.cc @@ -29,9 +29,8 @@ Status CastToAny(std::shared_ptr array, template <> Status CastToAny(std::shared_ptr array, - std::any& any) { // NOLINT - using ArrayType = - typename ConvertToArrowType::ArrayType; + std::any& any) { // NOLINT + using ArrayType = typename ConvertToArrowType::ArrayType; auto column = std::dynamic_pointer_cast(array); any = column->GetString(0); return Status::OK(); diff --git a/src/graph_info.cc b/src/graph_info.cc index a0ae644fa..833b8372f 100644 --- a/src/graph_info.cc +++ b/src/graph_info.cc @@ -36,8 +36,9 @@ Result VertexInfo::Load(std::shared_ptr yaml) { } InfoVersion version; if (yaml->operator[]("version")) { - GAR_ASSIGN_OR_RAISE(version, - InfoVersion::Parse(yaml->operator[]("version").as())); + GAR_ASSIGN_OR_RAISE( + version, + InfoVersion::Parse(yaml->operator[]("version").as())); } VertexInfo vertex_info(label, chunk_size, version, prefix); auto property_groups = yaml->operator[]("property_groups"); @@ -122,8 +123,9 @@ Result EdgeInfo::Load(std::shared_ptr yaml) { } InfoVersion version; if (yaml->operator[]("version")) { - GAR_ASSIGN_OR_RAISE(version, - InfoVersion::Parse(yaml->operator[]("version").as())); + GAR_ASSIGN_OR_RAISE( + version, + InfoVersion::Parse(yaml->operator[]("version").as())); } EdgeInfo edge_info(src_label, edge_label, dst_label, chunk_size, @@ -256,8 +258,9 @@ Result GraphInfo::Load(const std::string& input, } InfoVersion version; if (graph_meta->operator[]("version")) { - GAR_ASSIGN_OR_RAISE(version, - InfoVersion::Parse(graph_meta->operator[]("version").as())); + GAR_ASSIGN_OR_RAISE( + version, InfoVersion::Parse( + graph_meta->operator[]("version").as())); } GraphInfo graph_info(name, version, prefix); diff --git a/src/version_paser.cc b/src/version_parser.cc similarity index 89% rename from src/version_paser.cc rename to src/version_parser.cc index 3d7d2b40f..9e5dbdff6 100644 --- a/src/version_paser.cc +++ b/src/version_parser.cc @@ -14,7 +14,7 @@ limitations under the License. */ #include -#include +#include // NOLINT #include #include "gar/utils/version_parser.h" @@ -26,7 +26,7 @@ bool is_whitespace(char ch) { return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n'; } -void trim(std::string& s) { +void trim(std::string& s) { // NOLINT size_t trim_left = 0; for (auto it = s.begin(); it != s.end(); ++it) { if (!is_whitespace(*it)) { @@ -50,7 +50,8 @@ void trim(std::string& s) { if (trim_left == 0) { s.resize(s.size() - trim_right); } else { - std::string copy(s.c_str() + trim_left, s.size() - trim_left - trim_right); + std::string copy(s.c_str() + trim_left, + s.size() - trim_left - trim_right); s.swap(copy); } } @@ -70,7 +71,8 @@ int parserVersionImpl(const std::string& version_str) { } } -std::vector parseUserDefineTypesImpl(const std::string& version_str) { +std::vector parseUserDefineTypesImpl( + const std::string& version_str) { std::smatch match; std::vector user_define_types; const std::regex user_define_types_regex("gar/v\\d+ *\\((.*)\\).*"); @@ -97,7 +99,8 @@ std::vector parseUserDefineTypesImpl(const std::string& version_str return user_define_types; } -Result InfoVersion::Parse(const std::string& version_str) noexcept { +Result InfoVersion::Parse( + const std::string& version_str) noexcept { InfoVersion version; try { version.version_ = parserVersionImpl(version_str); diff --git a/src/vertices_builder.cc b/src/vertices_builder.cc index fdf00a480..2746c26e9 100644 --- a/src/vertices_builder.cc +++ b/src/vertices_builder.cc @@ -71,8 +71,8 @@ Result> VerticesBuilder::convertToTable() { for (auto& property_group : property_groups) { for (auto& property : property_group.GetProperties()) { // add a column to schema - schema_vector.push_back( - arrow::field(property.name, DataType::DataTypeToArrowDataType(property.type))); + schema_vector.push_back(arrow::field( + property.name, DataType::DataTypeToArrowDataType(property.type))); // add a column to data std::shared_ptr array; appendToArray(property.type, property.name, array); diff --git a/test/test_example/test_bfs_father_example.cc b/test/test_example/test_bfs_father_example.cc index 36e3f0e9a..450440bc0 100644 --- a/test/test_example/test_bfs_father_example.cc +++ b/test/test_example/test_bfs_father_example.cc @@ -83,9 +83,10 @@ TEST_CASE("test_bfs_with_father_example") { // Append the bfs result to the vertex info as a property group // and write to file // construct property group - GAR_NAMESPACE::Property bfs = {"bfs", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::INT32), false}; - GAR_NAMESPACE::Property father = {"father", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::INT64), - false}; + GAR_NAMESPACE::Property bfs = { + "bfs", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::INT32), false}; + GAR_NAMESPACE::Property father = { + "father", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::INT64), false}; std::vector property_vector = {bfs, father}; GAR_NAMESPACE::PropertyGroup group(property_vector, GAR_NAMESPACE::FileType::CSV); diff --git a/test/test_example/test_bfs_pull_example.cc b/test/test_example/test_bfs_pull_example.cc index bcd800270..511860600 100644 --- a/test/test_example/test_bfs_pull_example.cc +++ b/test/test_example/test_bfs_pull_example.cc @@ -85,8 +85,8 @@ TEST_CASE("test_bfs_using_pull_example") { // extend the original vertex info and write results to gar using writer // construct property group - GAR_NAMESPACE::Property bfs = {"bfs-pull", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::INT32), - false}; + GAR_NAMESPACE::Property bfs = { + "bfs-pull", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::INT32), false}; std::vector property_vector = {bfs}; GAR_NAMESPACE::PropertyGroup group(property_vector, GAR_NAMESPACE::FileType::PARQUET); diff --git a/test/test_example/test_bfs_push_example.cc b/test/test_example/test_bfs_push_example.cc index cc78e301a..7159efea0 100644 --- a/test/test_example/test_bfs_push_example.cc +++ b/test/test_example/test_bfs_push_example.cc @@ -84,8 +84,8 @@ TEST_CASE("test_bfs_using_push_example") { // extend the original vertex info and write results to gar using writer // construct property group - GAR_NAMESPACE::Property bfs = {"bfs-push", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::INT32), - false}; + GAR_NAMESPACE::Property bfs = { + "bfs-push", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::INT32), false}; std::vector property_vector = {bfs}; GAR_NAMESPACE::PropertyGroup group(property_vector, GAR_NAMESPACE::FileType::PARQUET); diff --git a/test/test_example/test_bfs_stream_example.cc b/test/test_example/test_bfs_stream_example.cc index 63726a013..8db4000ca 100644 --- a/test/test_example/test_bfs_stream_example.cc +++ b/test/test_example/test_bfs_stream_example.cc @@ -75,8 +75,8 @@ TEST_CASE("test_bfs_using_stream_example") { // extend the original vertex info and write results to gar using writer // construct property group - GAR_NAMESPACE::Property bfs = {"bfs-stream", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::INT32), - false}; + GAR_NAMESPACE::Property bfs = { + "bfs-stream", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::INT32), false}; std::vector property_vector = {bfs}; GAR_NAMESPACE::PropertyGroup group(property_vector, GAR_NAMESPACE::FileType::PARQUET); diff --git a/test/test_example/test_bgl_example.cc b/test/test_example/test_bgl_example.cc index 2b950eca3..5c2f09cbd 100644 --- a/test/test_example/test_bgl_example.cc +++ b/test/test_example/test_bgl_example.cc @@ -106,7 +106,8 @@ TEST_CASE("test_bgl_cc_example") { // method 1 for writing results: construct new vertex type and write results // using vertex builder construct new property group - GAR_NAMESPACE::Property cc = {"cc", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::INT32), false}; + GAR_NAMESPACE::Property cc = { + "cc", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::INT32), false}; std::vector property_vector = {cc}; GAR_NAMESPACE::PropertyGroup group(property_vector, GAR_NAMESPACE::FileType::PARQUET); @@ -114,7 +115,8 @@ TEST_CASE("test_bgl_cc_example") { std::string vertex_label = "cc_result", vertex_prefix = "result/"; int chunk_size = 100; GAR_NAMESPACE::InfoVersion version(1); - GAR_NAMESPACE::VertexInfo new_info(vertex_label, chunk_size, version, vertex_prefix); + GAR_NAMESPACE::VertexInfo new_info(vertex_label, chunk_size, version, + vertex_prefix); REQUIRE(new_info.AddPropertyGroup(group).ok()); // dump new vertex info REQUIRE(new_info.IsValidated()); diff --git a/test/test_example/test_cc_push_example.cc b/test/test_example/test_cc_push_example.cc index a4ca0663c..6eadc0a17 100644 --- a/test/test_example/test_cc_push_example.cc +++ b/test/test_example/test_cc_push_example.cc @@ -119,8 +119,8 @@ TEST_CASE("test_cc_using_push_example") { // extend the original vertex info and write results to gar using writer // construct property group - GAR_NAMESPACE::Property cc = {"cc-push", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::INT64), - false}; + GAR_NAMESPACE::Property cc = { + "cc-push", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::INT64), false}; std::vector property_vector = {cc}; GAR_NAMESPACE::PropertyGroup group(property_vector, GAR_NAMESPACE::FileType::PARQUET); diff --git a/test/test_example/test_cc_stream_example.cc b/test/test_example/test_cc_stream_example.cc index 7b2eb09cd..0f33c1288 100644 --- a/test/test_example/test_cc_stream_example.cc +++ b/test/test_example/test_cc_stream_example.cc @@ -86,7 +86,8 @@ TEST_CASE("test_cc_using_stream_example") { // extend the original vertex info and write results to gar using writer // construct property group - GAR_NAMESPACE::Property cc = {"cc", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::INT64), false}; + GAR_NAMESPACE::Property cc = { + "cc", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::INT64), false}; std::vector property_vector = {cc}; GAR_NAMESPACE::PropertyGroup group(property_vector, GAR_NAMESPACE::FileType::PARQUET); diff --git a/test/test_example/test_construct_info_example.cc b/test/test_example/test_construct_info_example.cc index 807b06f7b..94ced9252 100644 --- a/test/test_example/test_construct_info_example.cc +++ b/test/test_example/test_construct_info_example.cc @@ -42,13 +42,14 @@ TEST_CASE("test_construct_info_example") { REQUIRE(vertex_info.GetPropertyGroups().size() == 0); // construct properties and property groups - GAR_NAMESPACE::Property id = {"id", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::INT32), true}; - GAR_NAMESPACE::Property firstName = {"firstName", - GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::STRING), false}; - GAR_NAMESPACE::Property lastName = {"lastName", - GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::STRING), false}; - GAR_NAMESPACE::Property gender = {"gender", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::STRING), - false}; + GAR_NAMESPACE::Property id = { + "id", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::INT32), true}; + GAR_NAMESPACE::Property firstName = { + "firstName", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::STRING), false}; + GAR_NAMESPACE::Property lastName = { + "lastName", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::STRING), false}; + GAR_NAMESPACE::Property gender = { + "gender", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::STRING), false}; std::vector property_vector_1 = {id}, property_vector_2 = {firstName, lastName, gender}; @@ -100,9 +101,9 @@ TEST_CASE("test_construct_info_example") { edge_prefix = "edge/person_knows_person/"; int edge_chunk_size = 1024, src_chunk_size = 100, dst_chunk_size = 100; bool directed = false; - GAR_NAMESPACE::EdgeInfo edge_info(src_label, edge_label, dst_label, - edge_chunk_size, src_chunk_size, - dst_chunk_size, directed, version, edge_prefix); + GAR_NAMESPACE::EdgeInfo edge_info( + src_label, edge_label, dst_label, edge_chunk_size, src_chunk_size, + dst_chunk_size, directed, version, edge_prefix); REQUIRE(edge_info.GetSrcLabel() == src_label); REQUIRE(edge_info.GetEdgeLabel() == edge_label); REQUIRE(edge_info.GetDstLabel() == dst_label); @@ -140,7 +141,8 @@ TEST_CASE("test_construct_info_example") { // add property group & validate GAR_NAMESPACE::Property creationDate = { - "creationDate", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::STRING), false}; + "creationDate", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::STRING), + false}; std::vector property_vector_3 = {creationDate}; GAR_NAMESPACE::PropertyGroup group3(property_vector_3, GAR_NAMESPACE::FileType::PARQUET); diff --git a/test/test_example/test_pagerank_example.cc b/test/test_example/test_pagerank_example.cc index 25ad776a8..9b9935fc0 100644 --- a/test/test_example/test_pagerank_example.cc +++ b/test/test_example/test_pagerank_example.cc @@ -85,8 +85,8 @@ TEST_CASE("test_pagerank_example") { // extend the original vertex info and write results to gar using writer // construct property group - GAR_NAMESPACE::Property pagerank = {"pagerank", - GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::DOUBLE), false}; + GAR_NAMESPACE::Property pagerank = { + "pagerank", GAR_NAMESPACE::DataType(GAR_NAMESPACE::Type::DOUBLE), false}; std::vector property_vector = {pagerank}; GAR_NAMESPACE::PropertyGroup group(property_vector, GAR_NAMESPACE::FileType::PARQUET); diff --git a/test/test_info.cc b/test/test_info.cc index 6037bb6b7..8880f85f8 100644 --- a/test/test_info.cc +++ b/test/test_info.cc @@ -305,13 +305,13 @@ TEST_CASE("test_graph_info_load_from_file") { } TEST_CASE("test_info_version") { - std::string version_str1 = "gar/v1"; - std::cout << version_str1 << std::endl; - GAR_NAMESPACE::InfoVersion::Parse(version_str1); - std::string version_str2 = "gar/v2"; - std::cout << version_str2 << std::endl; - GAR_NAMESPACE::InfoVersion::Parse(version_str2); - std::string version_str3 = "gar/v3 (udd1, udd2)"; - std::cout << version_str3 << std::endl; - GAR_NAMESPACE::InfoVersion::Parse(version_str3); + std::string version_str1 = "gar/v1"; + std::cout << version_str1 << std::endl; + GAR_NAMESPACE::InfoVersion::Parse(version_str1); + std::string version_str2 = "gar/v2"; + std::cout << version_str2 << std::endl; + GAR_NAMESPACE::InfoVersion::Parse(version_str2); + std::string version_str3 = "gar/v3 (udd1, udd2)"; + std::cout << version_str3 << std::endl; + GAR_NAMESPACE::InfoVersion::Parse(version_str3); } From 0181601fdfd297aed7a7e004dfb3a742336ed610 Mon Sep 17 00:00:00 2001 From: acezen Date: Mon, 12 Dec 2022 10:10:02 +0800 Subject: [PATCH 03/11] Add docs about InfoVersion --- docs/api-reference.rst | 6 ++++++ include/gar/graph_info.h | 3 +++ include/gar/utils/version_parser.h | 9 +++++++-- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/docs/api-reference.rst b/docs/api-reference.rst index 60f7c0b2d..dc0a82f90 100644 --- a/docs/api-reference.rst +++ b/docs/api-reference.rst @@ -210,3 +210,9 @@ Yaml Parser .. doxygenclass:: GraphArchive::Yaml :members: :undoc-members: + +Info Version + +.. doxygenclass:: GraphArchive::InfoVersion + :members: + :undoc-members: diff --git a/include/gar/graph_info.h b/include/gar/graph_info.h index a753cd101..ef07144f5 100644 --- a/include/gar/graph_info.h +++ b/include/gar/graph_info.h @@ -120,6 +120,7 @@ class VertexInfo { * * @param label The label of the vertex. * @param chunk_size number of vertex in each vertex chunk. + * @param version version of the vertex info. * @param prefix prefix of the vertex info. */ explicit VertexInfo(const std::string& label, IdType chunk_size, @@ -320,6 +321,7 @@ class EdgeInfo { * @param src_chunk_size number of source vertices in each vertex chunk * @param dst_chunk_size number of destination vertices in each vertex chunk * @param directed whether the edge is directed + * @param version version of the edge info * @param prefix prefix of the edge info */ explicit EdgeInfo(const std::string& src_label, const std::string& edge_label, @@ -719,6 +721,7 @@ class GraphInfo { * the prefix of graph would be ./ by default. * * @param[in] graph_name name of graph + * @param[in] version version of graph info * @param[in] prefix absolute path prefix to store chunk files of graph. */ explicit GraphInfo(const std::string& graph_name, const InfoVersion& version, diff --git a/include/gar/utils/version_parser.h b/include/gar/utils/version_parser.h index 7636b80de..2999206d0 100644 --- a/include/gar/utils/version_parser.h +++ b/include/gar/utils/version_parser.h @@ -28,15 +28,19 @@ namespace GAR_NAMESPACE_INTERNAL { /// \brief InfoVersion is a class provide version information of info. class InfoVersion { public: - // inline static const std::regex version_regex{"gar/v\\d+\\"}; - // inline static const std::regex user_define_types_regex{"\\(.*\\)"}; + /// \brief Parse version string to InfoVersion. static Result Parse(const std::string& str) noexcept; + /// Default constructor InfoVersion() : version_(version2types.rbegin()->first) {} + /// Constructor with version explicit InfoVersion(int version) : version_(version) {} + /// Copy constructor InfoVersion(const InfoVersion& other) = default; + /// Copy assignment inline InfoVersion& operator=(const InfoVersion& other) = default; + /// Dump version to string std::string ToString() const { std::string str = "gar/" + std::to_string(version_); if (!user_define_types_.empty()) { @@ -49,6 +53,7 @@ class InfoVersion { return str; } + /// Check if type is supported by version inline bool CheckType(const std::string& type_str) noexcept { auto& types = version2types.at(version_); // check if type_str is in supported types of version From 13c41ad16fdd5fc3c0a1b7483e55fba161423cf6 Mon Sep 17 00:00:00 2001 From: acezen Date: Mon, 12 Dec 2022 15:56:39 +0800 Subject: [PATCH 04/11] Update --- include/gar/utils/version_parser.h | 17 +++++++++++-- test/test_info.cc | 41 +++++++++++++++++++++--------- 2 files changed, 44 insertions(+), 14 deletions(-) diff --git a/include/gar/utils/version_parser.h b/include/gar/utils/version_parser.h index 2999206d0..24924b691 100644 --- a/include/gar/utils/version_parser.h +++ b/include/gar/utils/version_parser.h @@ -34,15 +34,28 @@ class InfoVersion { /// Default constructor InfoVersion() : version_(version2types.rbegin()->first) {} /// Constructor with version - explicit InfoVersion(int version) : version_(version) {} + explicit InfoVersion(int version) : version_(version) { + if (version2types.find(version) == version2types.end()) { + throw std::invalid_argument("Unsupported version: " + + std::to_string(version)); + } + } /// Copy constructor InfoVersion(const InfoVersion& other) = default; /// Copy assignment inline InfoVersion& operator=(const InfoVersion& other) = default; + /// Get version + int version() const { return version_; } + + /// Get user defined types + const std::vector& user_define_types() const { + return user_define_types_; + } + /// Dump version to string std::string ToString() const { - std::string str = "gar/" + std::to_string(version_); + std::string str = "gar/v" + std::to_string(version_); if (!user_define_types_.empty()) { str += " ("; for (auto& type : user_define_types_) { diff --git a/test/test_info.cc b/test/test_info.cc index 8880f85f8..ec5094932 100644 --- a/test/test_info.cc +++ b/test/test_info.cc @@ -291,6 +291,35 @@ TEST_CASE("test_edge_info") { // TODO(@acezen): test is validated } +TEST_CASE("test_info_version") { + GAR_NAMESPACE::InfoVersion info_version(1); + REQUIRE(info_version.GetVersion() == 1); + REQUIRE(info_version.user_define_types() == std::vector({})); + REQUIRE(info_version.ToString() == "gar/v1"); + REQUIRE(info_version.CheckType("int32") == true); + REQUIRE(info_version.CheckType("date32") == false); + + GAR_NAMESPACE::InfoVersion info_version_2(1, {"t1", "t2"}); + REQUIRE(info_version_2.GetVersion() == 1); + REQUIRE(info_version_2.user_define_types() == + std::vector({"t1", "t2"})); + REQUIRE(info_version_2.ToString() == "gar/v1 (t1, t2)"); + REQUIRE(info_version.CheckType("t1") == true); + + // raise error if version is not 1 + CHECK_THROWS_AS(GAR_NAMESPACE::InfoVersion(2), std::invalid_argument); + + std::string version_str = "gar/v1 (t1, t2)"; + auto info_version_result = GAR_NAMESPACE::InfoVersion::Parse(version_str); + REQUIRE(!info_version_result.has_error()); + auto& info_version_3 = info_version_result.value(); + REQUIRE(info_version_3.GetVersion() == 1); + REQUIRE(info_version_3.user_define_types() == + std::vector({"t1", "t2"})); + REQUIRE(info_version_3.ToString() == "gar/v1 (t1, t2)"); + REQUIRE(info_version.CheckType("t1") == true); +} + TEST_CASE("test_graph_info_load_from_file") { std::string path = TEST_DATA_DIR + "/ldbc_sample/csv/ldbc_sample.graph.yml"; auto graph_info_result = GAR_NAMESPACE::GraphInfo::Load(path); @@ -303,15 +332,3 @@ TEST_CASE("test_graph_info_load_from_file") { REQUIRE(vertex_infos.size() == 1); REQUIRE(edge_infos.size() == 1); } - -TEST_CASE("test_info_version") { - std::string version_str1 = "gar/v1"; - std::cout << version_str1 << std::endl; - GAR_NAMESPACE::InfoVersion::Parse(version_str1); - std::string version_str2 = "gar/v2"; - std::cout << version_str2 << std::endl; - GAR_NAMESPACE::InfoVersion::Parse(version_str2); - std::string version_str3 = "gar/v3 (udd1, udd2)"; - std::cout << version_str3 << std::endl; - GAR_NAMESPACE::InfoVersion::Parse(version_str3); -} From 86cafe048645fd4c19a3cef508cb7814129eb2f9 Mon Sep 17 00:00:00 2001 From: acezen Date: Mon, 12 Dec 2022 15:57:55 +0800 Subject: [PATCH 05/11] Minor fix --- docs/api-reference.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/api-reference.rst b/docs/api-reference.rst index dc0a82f90..d79e64097 100644 --- a/docs/api-reference.rst +++ b/docs/api-reference.rst @@ -212,6 +212,7 @@ Yaml Parser :undoc-members: Info Version +~~~~~~~~~~~~~~~~~~~ .. doxygenclass:: GraphArchive::InfoVersion :members: From 37dde8d81e865e3da865480cff73d9e94d9f95ff Mon Sep 17 00:00:00 2001 From: acezen Date: Mon, 12 Dec 2022 16:06:12 +0800 Subject: [PATCH 06/11] Minor update --- include/gar/utils/utils.h | 1 - test/gar-test | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/include/gar/utils/utils.h b/include/gar/utils/utils.h index f22f27119..aa26fe412 100644 --- a/include/gar/utils/utils.h +++ b/include/gar/utils/utils.h @@ -25,7 +25,6 @@ limitations under the License. #include "gar/utils/result.h" #define REGULAR_SEPERATOR "_" -#define GAR_VERSION 1 namespace arrow { class Array; diff --git a/test/gar-test b/test/gar-test index 03001b0c1..19f1e57b9 160000 --- a/test/gar-test +++ b/test/gar-test @@ -1 +1 @@ -Subproject commit 03001b0c1fd47854daa7d68793ec3083f15b859d +Subproject commit 19f1e57b9c137ad5447667c4bf71bbc2a1e4d371 From 6febabe33cf38de8858899b733dd4c779a915ac1 Mon Sep 17 00:00:00 2001 From: acezen Date: Mon, 12 Dec 2022 20:35:07 +0800 Subject: [PATCH 07/11] Minor fix --- test/test_info.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_info.cc b/test/test_info.cc index ec5094932..4c82080a7 100644 --- a/test/test_info.cc +++ b/test/test_info.cc @@ -293,14 +293,14 @@ TEST_CASE("test_edge_info") { TEST_CASE("test_info_version") { GAR_NAMESPACE::InfoVersion info_version(1); - REQUIRE(info_version.GetVersion() == 1); + REQUIRE(info_version.version() == 1); REQUIRE(info_version.user_define_types() == std::vector({})); REQUIRE(info_version.ToString() == "gar/v1"); REQUIRE(info_version.CheckType("int32") == true); REQUIRE(info_version.CheckType("date32") == false); GAR_NAMESPACE::InfoVersion info_version_2(1, {"t1", "t2"}); - REQUIRE(info_version_2.GetVersion() == 1); + REQUIRE(info_version_2.version() == 1); REQUIRE(info_version_2.user_define_types() == std::vector({"t1", "t2"})); REQUIRE(info_version_2.ToString() == "gar/v1 (t1, t2)"); @@ -313,7 +313,7 @@ TEST_CASE("test_info_version") { auto info_version_result = GAR_NAMESPACE::InfoVersion::Parse(version_str); REQUIRE(!info_version_result.has_error()); auto& info_version_3 = info_version_result.value(); - REQUIRE(info_version_3.GetVersion() == 1); + REQUIRE(info_version_3.version() == 1); REQUIRE(info_version_3.user_define_types() == std::vector({"t1", "t2"})); REQUIRE(info_version_3.ToString() == "gar/v1 (t1, t2)"); From fe7b813964cee827a9689ef52410966471099a06 Mon Sep 17 00:00:00 2001 From: acezen Date: Mon, 12 Dec 2022 20:44:52 +0800 Subject: [PATCH 08/11] Update --- include/gar/utils/version_parser.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/gar/utils/version_parser.h b/include/gar/utils/version_parser.h index 24924b691..dd51094e2 100644 --- a/include/gar/utils/version_parser.h +++ b/include/gar/utils/version_parser.h @@ -40,6 +40,14 @@ class InfoVersion { std::to_string(version)); } } + /// Constructor with version and user defined types + explicit InfoVersion(int version, const std::vector& user_define_types) + : version_(version), user_define_types_(user_define_types) { + if (version2types.find(version) == version2types.end()) { + throw std::invalid_argument("Unsupported version: " + + std::to_string(version)); + } + } /// Copy constructor InfoVersion(const InfoVersion& other) = default; /// Copy assignment From bdc9a7a702182d69c263da4ff1b3ce15af4eb29b Mon Sep 17 00:00:00 2001 From: acezen Date: Mon, 12 Dec 2022 20:52:22 +0800 Subject: [PATCH 09/11] Minor fix --- test/test_info.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/test_info.cc b/test/test_info.cc index 4c82080a7..a73d232a4 100644 --- a/test/test_info.cc +++ b/test/test_info.cc @@ -303,21 +303,21 @@ TEST_CASE("test_info_version") { REQUIRE(info_version_2.version() == 1); REQUIRE(info_version_2.user_define_types() == std::vector({"t1", "t2"})); - REQUIRE(info_version_2.ToString() == "gar/v1 (t1, t2)"); - REQUIRE(info_version.CheckType("t1") == true); + REQUIRE(info_version_2.ToString() == "gar/v1 (t1,t2)"); + REQUIRE(info_version_2.CheckType("t1") == true); // raise error if version is not 1 CHECK_THROWS_AS(GAR_NAMESPACE::InfoVersion(2), std::invalid_argument); - std::string version_str = "gar/v1 (t1, t2)"; + std::string version_str = "gar/v1 (t1,t2)"; auto info_version_result = GAR_NAMESPACE::InfoVersion::Parse(version_str); REQUIRE(!info_version_result.has_error()); auto& info_version_3 = info_version_result.value(); REQUIRE(info_version_3.version() == 1); REQUIRE(info_version_3.user_define_types() == std::vector({"t1", "t2"})); - REQUIRE(info_version_3.ToString() == "gar/v1 (t1, t2)"); - REQUIRE(info_version.CheckType("t1") == true); + REQUIRE(info_version_3.ToString() == version_str); + REQUIRE(info_version_3.CheckType("t1") == true); } TEST_CASE("test_graph_info_load_from_file") { From 5b807f6ecee03a874ec6c82a6cfdd184318b2a0c Mon Sep 17 00:00:00 2001 From: acezen Date: Mon, 12 Dec 2022 21:12:58 +0800 Subject: [PATCH 10/11] Fix --- include/gar/utils/version_parser.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/gar/utils/version_parser.h b/include/gar/utils/version_parser.h index dd51094e2..078ebdb8a 100644 --- a/include/gar/utils/version_parser.h +++ b/include/gar/utils/version_parser.h @@ -41,7 +41,8 @@ class InfoVersion { } } /// Constructor with version and user defined types - explicit InfoVersion(int version, const std::vector& user_define_types) + explicit InfoVersion(int version, + const std::vector& user_define_types) : version_(version), user_define_types_(user_define_types) { if (version2types.find(version) == version2types.end()) { throw std::invalid_argument("Unsupported version: " + From b9e1af6fcfe8bd3b466d8b3d6352c39aa21e5f4a Mon Sep 17 00:00:00 2001 From: acezen Date: Tue, 13 Dec 2022 14:05:07 +0800 Subject: [PATCH 11/11] Add GetVersion to Info --- include/gar/graph_info.h | 9 +++++++++ include/gar/utils/version_parser.h | 6 ++++++ test/test_info.cc | 5 +++++ 3 files changed, 20 insertions(+) diff --git a/include/gar/graph_info.h b/include/gar/graph_info.h index ef07144f5..d3812e243 100644 --- a/include/gar/graph_info.h +++ b/include/gar/graph_info.h @@ -183,6 +183,9 @@ class VertexInfo { /// Get the path prefix of the vertex. inline std::string GetPrefix() const { return prefix_; } + /// Get the version info of the vertex. + inline const InfoVersion& GetVersion() const { return version_; } + /// Get the property groups of the vertex. inline const std::vector& GetPropertyGroups() const { return property_groups_; @@ -440,6 +443,9 @@ class EdgeInfo { /// Check if edge is directed. inline bool IsDirected() const noexcept { return directed_; } + /// Get the version info of the edge. + inline const InfoVersion& GetVersion() const { return version_; } + /// Get path prefix of adj list type. inline Result GetAdjListPrefix(AdjListType adj_list_type) const { if (!ContainAdjList(adj_list_type)) { @@ -790,6 +796,9 @@ class GraphInfo { /// Get the absolute path prefix of chunk files. inline std::string GetPrefix() const noexcept { return prefix_; } + /// Get the version info of the edge. + inline const InfoVersion& GetVersion() const { return version_; } + /// Get the vertex info by vertex label inline Result GetVertexInfo(const std::string& label) const noexcept { diff --git a/include/gar/utils/version_parser.h b/include/gar/utils/version_parser.h index 078ebdb8a..6e26616db 100644 --- a/include/gar/utils/version_parser.h +++ b/include/gar/utils/version_parser.h @@ -54,6 +54,12 @@ class InfoVersion { /// Copy assignment inline InfoVersion& operator=(const InfoVersion& other) = default; + /// Check if two InfoVersion are equal + bool operator==(const InfoVersion& other) const { + return version_ == other.version_ && + user_define_types_ == other.user_define_types_; + } + /// Get version int version() const { return version_; } diff --git a/test/test_info.cc b/test/test_info.cc index a73d232a4..f47e6a064 100644 --- a/test/test_info.cc +++ b/test/test_info.cc @@ -33,6 +33,7 @@ TEST_CASE("test_graph_info") { GAR_NAMESPACE::GraphInfo graph_info(graph_name, version, prefix); REQUIRE(graph_info.GetName() == graph_name); REQUIRE(graph_info.GetPrefix() == prefix); + REQUIRE(graph_info.GetVersion() == version); // test add vertex and get vertex info REQUIRE(graph_info.GetAllVertexInfo().size() == 0); @@ -68,6 +69,8 @@ TEST_CASE("test_graph_info") { // edge info already exists REQUIRE(graph_info.AddEdge(edge_info).IsInvalidOperation()); + REQUIRE(graph_info.GetVersion() == version); + // TODO(@acezen): test dump std::string save_path(std::tmpnam(nullptr)); @@ -87,6 +90,7 @@ TEST_CASE("test_vertex_info") { REQUIRE(v_info.GetLabel() == label); REQUIRE(v_info.GetChunkSize() == chunk_size); REQUIRE(v_info.GetPrefix() == label + "/"); // default prefix is label + "/" + REQUIRE(v_info.GetVersion() == version); // test add property group GAR_NAMESPACE::Property p; @@ -161,6 +165,7 @@ TEST_CASE("test_edge_info") { REQUIRE(edge_info.IsDirected() == directed); REQUIRE(edge_info.GetPrefix() == src_label + "_" + edge_label + "_" + dst_label + "/"); + REQUIRE(edge_info.GetVersion() == version); auto adj_list_type = GAR_NAMESPACE::AdjListType::ordered_by_source; auto adj_list_type_not_exist = GAR_NAMESPACE::AdjListType::ordered_by_dest;