From 8549082d1de2d809fe5a378e57dde27987a51650 Mon Sep 17 00:00:00 2001 From: acezen Date: Mon, 11 Mar 2024 14:45:41 +0800 Subject: [PATCH 1/3] [Feat][C++] Support data type Signed-off-by: acezen --- cpp/include/gar/fwd.h | 2 + cpp/include/gar/util/convert_to_arrow_type.h | 3 + cpp/include/gar/util/data_type.h | 16 +++++ cpp/src/data_type.cc | 7 ++ cpp/src/edges_builder.cc | 70 ++++++++++++++------ cpp/src/graph.cc | 32 +++++++++ cpp/src/vertices_builder.cc | 67 +++++++++++++------ 7 files changed, 158 insertions(+), 39 deletions(-) diff --git a/cpp/include/gar/fwd.h b/cpp/include/gar/fwd.h index 0902539ed..d0c9f444d 100644 --- a/cpp/include/gar/fwd.h +++ b/cpp/include/gar/fwd.h @@ -189,6 +189,8 @@ const std::shared_ptr& float32(); const std::shared_ptr& float64(); /// @brief Return a string DataType instance const std::shared_ptr& string(); +/// @brief Return a timestamp DataType instance +const std::shared_ptr& timestamp(); /** * @brief Return a list DataType instance * diff --git a/cpp/include/gar/util/convert_to_arrow_type.h b/cpp/include/gar/util/convert_to_arrow_type.h index ff3b1dd40..33de5ab59 100644 --- a/cpp/include/gar/util/convert_to_arrow_type.h +++ b/cpp/include/gar/util/convert_to_arrow_type.h @@ -69,6 +69,9 @@ CONVERT_TO_ARROW_TYPE(Type::DOUBLE, double, arrow::DoubleType, CONVERT_TO_ARROW_TYPE(Type::STRING, std::string, arrow::LargeStringType, arrow::LargeStringArray, arrow::LargeStringBuilder, arrow::large_utf8(), "string") +CONVERT_TO_ARROW_TYPE(Type::TIMESTAMP, Timestamp, arrow::TimestampType, + arrow::TimestampArray, arrow::TimestampBuilder, + arrow::timestamp(arrow::TimeUnit::MILLI), "timestamp") } // namespace GAR_NAMESPACE_INTERNAL diff --git a/cpp/include/gar/util/data_type.h b/cpp/include/gar/util/data_type.h index d7bcf6085..36e05c548 100644 --- a/cpp/include/gar/util/data_type.h +++ b/cpp/include/gar/util/data_type.h @@ -54,6 +54,9 @@ enum class Type { /** List of some logical data type */ LIST, + /** Exact timestamp encoded with int64 since UNIX epoch in milliseconds */ + TIMESTAMP, + /** User-defined data type */ USER_DEFINED, @@ -125,6 +128,19 @@ class DataType { std::shared_ptr child_; std::string user_defined_type_name_; }; // struct DataType + +// Define a Timestamp class to represent timestamp data type value +class Timestamp { + public: + using c_type = int64_t; + explicit Timestamp(c_type value) : value_(value) {} + + c_type value() const { return value_; } + + private: + c_type value_; +}; + } // namespace GAR_NAMESPACE_INTERNAL #endif // GAR_UTIL_DATA_TYPE_H_ diff --git a/cpp/src/data_type.cc b/cpp/src/data_type.cc index 69421ca92..b490da500 100644 --- a/cpp/src/data_type.cc +++ b/cpp/src/data_type.cc @@ -39,6 +39,8 @@ std::shared_ptr DataType::DataTypeToArrowDataType( return arrow::float64(); case Type::STRING: return arrow::large_utf8(); + case Type::TIMESTAMP: + return arrow::timestamp(arrow::TimeUnit::MILLI); case Type::LIST: return arrow::list(DataTypeToArrowDataType(type->child_)); default: @@ -65,6 +67,8 @@ std::shared_ptr DataType::ArrowDataTypeToDataType( return string(); case arrow::Type::LARGE_STRING: return string(); + case arrow::Type::TIMESTAMP: + return timestamp(); case arrow::Type::LIST: return list(ArrowDataTypeToDataType(type->field(0)->type())); default: @@ -113,6 +117,8 @@ std::shared_ptr DataType::TypeNameToDataType(const std::string& str) { return float64(); } else if (str == "string") { return string(); + } else if (str == "timestamp") { + return timestamp(); } else if (str == "list") { return list(int32()); } else if (str == "list") { @@ -141,6 +147,7 @@ TYPE_FACTORY(int64, Type::INT64) TYPE_FACTORY(float32, Type::FLOAT) TYPE_FACTORY(float64, Type::DOUBLE) TYPE_FACTORY(string, Type::STRING) +TYPE_FACTORY(timestamp, Type::TIMESTAMP) std::shared_ptr list(const std::shared_ptr& value_type) { return std::make_shared(Type::LIST, value_type); diff --git a/cpp/src/edges_builder.cc b/cpp/src/edges_builder.cc index 3b6361e54..9c3ec5354 100644 --- a/cpp/src/edges_builder.cc +++ b/cpp/src/edges_builder.cc @@ -152,6 +152,13 @@ Status EdgesBuilder::validate(const Edge& e, invalid_type = true; } break; + case Type::TIMESTAMP: + // timestamp is stored as int64_t + if (property.second.type() != + typeid(typename TypeToArrowType::CType::c_type)) { + invalid_type = true; + } + break; default: return Status::TypeError("Unsupported property type."); } @@ -165,6 +172,47 @@ Status EdgesBuilder::validate(const Edge& e, return Status::OK(); } +template +Status EdgesBuilder::tryToAppend( + const std::string& property_name, + std::shared_ptr& array, // NOLINT + const std::vector& edges) { + using CType = typename TypeToArrowType::CType; + arrow::MemoryPool* pool = arrow::default_memory_pool(); + typename TypeToArrowType::BuilderType builder(pool); + for (const auto& e : edges) { + if (e.Empty() || (!e.ContainProperty(property_name))) { + RETURN_NOT_ARROW_OK(builder.AppendNull()); + } else { + RETURN_NOT_ARROW_OK( + builder.Append(std::any_cast(e.GetProperty(property_name)))); + } + } + array = builder.Finish().ValueOrDie(); + return Status::OK(); +} + +template <> +Status EdgesBuilder::tryToAppend( + const std::string& property_name, + std::shared_ptr& array, // NOLINT + const std::vector& edges) { + using CType = typename TypeToArrowType::CType::c_type; + arrow::MemoryPool* pool = arrow::default_memory_pool(); + typename TypeToArrowType::BuilderType builder( + arrow::timestamp(arrow::TimeUnit::MILLI), pool); + for (const auto& e : edges) { + if (e.Empty() || (!e.ContainProperty(property_name))) { + RETURN_NOT_ARROW_OK(builder.AppendNull()); + } else { + RETURN_NOT_ARROW_OK( + builder.Append(std::any_cast(e.GetProperty(property_name)))); + } + } + array = builder.Finish().ValueOrDie(); + return Status::OK(); +} + Status EdgesBuilder::appendToArray( const std::shared_ptr& type, const std::string& property_name, std::shared_ptr& array, // NOLINT @@ -182,32 +230,14 @@ Status EdgesBuilder::appendToArray( return tryToAppend(property_name, array, edges); case Type::STRING: return tryToAppend(property_name, array, edges); + case Type::TIMESTAMP: + return tryToAppend(property_name, array, edges); default: return Status::TypeError("Unsupported property type."); } return Status::OK(); } -template -Status EdgesBuilder::tryToAppend( - const std::string& property_name, - std::shared_ptr& array, // NOLINT - const std::vector& edges) { - using CType = typename TypeToArrowType::CType; - arrow::MemoryPool* pool = arrow::default_memory_pool(); - typename TypeToArrowType::BuilderType builder(pool); - for (const auto& e : edges) { - if (e.Empty() || (!e.ContainProperty(property_name))) { - RETURN_NOT_ARROW_OK(builder.AppendNull()); - } else { - RETURN_NOT_ARROW_OK( - builder.Append(std::any_cast(e.GetProperty(property_name)))); - } - } - array = builder.Finish().ValueOrDie(); - return Status::OK(); -} - Status EdgesBuilder::tryToAppend( int src_or_dest, std::shared_ptr& array, // NOLINT diff --git a/cpp/src/graph.cc b/cpp/src/graph.cc index 3b9ea48a4..f7c267ee8 100644 --- a/cpp/src/graph.cc +++ b/cpp/src/graph.cc @@ -54,6 +54,8 @@ Status TryToCastToAny(const std::shared_ptr& type, return CastToAny(array, any); case Type::STRING: return CastToAny(array, any); + case Type::TIMESTAMP: + return CastToAny(array, any); default: return Status::TypeError("Unsupported type."); } @@ -111,6 +113,21 @@ Result Vertex::property(const std::string& property) const { } } +template <> +Result Vertex::property(const std::string& property) const { + if (properties_.find(property) == properties_.end()) { + return Status::KeyError("Property with name ", property, + " does not exist in the vertex."); + } + try { + Timestamp ret(std::any_cast(properties_.at(property))); + return ret; + } catch (const std::bad_any_cast& e) { + return Status::TypeError("Any cast failed, the property type of ", property, + " is not matched ", e.what()); + } +} + template <> Result Vertex::property(const std::string& property) const { auto it = list_properties_.find(property); @@ -182,6 +199,21 @@ Result Edge::property(const std::string& property) const { } } +template <> +Result Edge::property(const std::string& property) const { + if (properties_.find(property) == properties_.end()) { + return Status::KeyError("Property with name ", property, + " does not exist in the edge."); + } + try { + Timestamp ret(std::any_cast(properties_.at(property))); + return ret; + } catch (const std::bad_any_cast& e) { + return Status::TypeError("Any cast failed, the property type of ", property, + " is not matched ", e.what()); + } +} + template <> Result Edge::property(const std::string& property) const { auto it = list_properties_.find(property); diff --git a/cpp/src/vertices_builder.cc b/cpp/src/vertices_builder.cc index c6eb55bf6..fdc637338 100644 --- a/cpp/src/vertices_builder.cc +++ b/cpp/src/vertices_builder.cc @@ -99,6 +99,13 @@ Status VerticesBuilder::validate(const Vertex& v, IdType index, invalid_type = true; } break; + case Type::TIMESTAMP: + // timestamp is stored as int64_t + if (property.second.type() != + typeid(typename TypeToArrowType::CType::c_type)) { + invalid_type = true; + } + break; default: return Status::TypeError("Unsupported property type."); } @@ -112,6 +119,45 @@ Status VerticesBuilder::validate(const Vertex& v, IdType index, return Status::OK(); } +template +Status VerticesBuilder::tryToAppend( + const std::string& property_name, + std::shared_ptr& array) { // NOLINT + using CType = typename TypeToArrowType::CType; + arrow::MemoryPool* pool = arrow::default_memory_pool(); + typename TypeToArrowType::BuilderType builder(pool); + for (auto& v : vertices_) { + if (v.Empty() || !v.ContainProperty(property_name)) { + RETURN_NOT_ARROW_OK(builder.AppendNull()); + } else { + RETURN_NOT_ARROW_OK( + builder.Append(std::any_cast(v.GetProperty(property_name)))); + } + } + array = builder.Finish().ValueOrDie(); + return Status::OK(); +} + +template <> +Status VerticesBuilder::tryToAppend( + const std::string& property_name, + std::shared_ptr& array) { // NOLINT + using CType = typename TypeToArrowType::CType::c_type; + arrow::MemoryPool* pool = arrow::default_memory_pool(); + typename TypeToArrowType::BuilderType builder( + arrow::timestamp(arrow::TimeUnit::MILLI), pool); + for (auto& v : vertices_) { + if (v.Empty() || !v.ContainProperty(property_name)) { + RETURN_NOT_ARROW_OK(builder.AppendNull()); + } else { + RETURN_NOT_ARROW_OK( + builder.Append(std::any_cast(v.GetProperty(property_name)))); + } + } + array = builder.Finish().ValueOrDie(); + return Status::OK(); +} + Status VerticesBuilder::appendToArray( const std::shared_ptr& type, const std::string& property_name, std::shared_ptr& array) { // NOLINT @@ -128,31 +174,14 @@ Status VerticesBuilder::appendToArray( return tryToAppend(property_name, array); case Type::STRING: return tryToAppend(property_name, array); + case Type::TIMESTAMP: + return tryToAppend(property_name, array); default: return Status::TypeError("Unsupported property type."); } return Status::OK(); } -template -Status VerticesBuilder::tryToAppend( - const std::string& property_name, - std::shared_ptr& array) { // NOLINT - using CType = typename TypeToArrowType::CType; - arrow::MemoryPool* pool = arrow::default_memory_pool(); - typename TypeToArrowType::BuilderType builder(pool); - for (auto& v : vertices_) { - if (v.Empty() || !v.ContainProperty(property_name)) { - RETURN_NOT_ARROW_OK(builder.AppendNull()); - } else { - RETURN_NOT_ARROW_OK( - builder.Append(std::any_cast(v.GetProperty(property_name)))); - } - } - array = builder.Finish().ValueOrDie(); - return Status::OK(); -} - Result> VerticesBuilder::convertToTable() { const auto& property_groups = vertex_info_->GetPropertyGroups(); std::vector> arrays; From 0ad5e73efcd1f76a3b7f68ce46a3fa61760ebfa3 Mon Sep 17 00:00:00 2001 From: acezen Date: Mon, 11 Mar 2024 15:27:29 +0800 Subject: [PATCH 2/3] Support date data type Signed-off-by: acezen --- cpp/include/gar/fwd.h | 2 ++ cpp/include/gar/util/convert_to_arrow_type.h | 2 ++ cpp/include/gar/util/data_type.h | 15 +++++++++ cpp/src/data_type.cc | 11 +++++++ cpp/src/edges_builder.cc | 22 ++++++++++++++ cpp/src/graph.cc | 32 ++++++++++++++++++++ cpp/src/vertices_builder.cc | 21 +++++++++++++ 7 files changed, 105 insertions(+) diff --git a/cpp/include/gar/fwd.h b/cpp/include/gar/fwd.h index d0c9f444d..5c1b0f741 100644 --- a/cpp/include/gar/fwd.h +++ b/cpp/include/gar/fwd.h @@ -189,6 +189,8 @@ const std::shared_ptr& float32(); const std::shared_ptr& float64(); /// @brief Return a string DataType instance const std::shared_ptr& string(); +/// @brief Return a date DataType instance +const std::shared_ptr& date(); /// @brief Return a timestamp DataType instance const std::shared_ptr& timestamp(); /** diff --git a/cpp/include/gar/util/convert_to_arrow_type.h b/cpp/include/gar/util/convert_to_arrow_type.h index 33de5ab59..3d80f967e 100644 --- a/cpp/include/gar/util/convert_to_arrow_type.h +++ b/cpp/include/gar/util/convert_to_arrow_type.h @@ -72,6 +72,8 @@ CONVERT_TO_ARROW_TYPE(Type::STRING, std::string, arrow::LargeStringType, CONVERT_TO_ARROW_TYPE(Type::TIMESTAMP, Timestamp, arrow::TimestampType, arrow::TimestampArray, arrow::TimestampBuilder, arrow::timestamp(arrow::TimeUnit::MILLI), "timestamp") +CONVERT_TO_ARROW_TYPE(Type::DATE, Date, arrow::Date32Type, arrow::Date32Array, + arrow::Date32Builder, arrow::date32(), "date") } // namespace GAR_NAMESPACE_INTERNAL diff --git a/cpp/include/gar/util/data_type.h b/cpp/include/gar/util/data_type.h index 36e05c548..c3be0cc21 100644 --- a/cpp/include/gar/util/data_type.h +++ b/cpp/include/gar/util/data_type.h @@ -54,6 +54,9 @@ enum class Type { /** List of some logical data type */ LIST, + /** int32_t days since the UNIX epoch */ + DATE, + /** Exact timestamp encoded with int64 since UNIX epoch in milliseconds */ TIMESTAMP, @@ -141,6 +144,18 @@ class Timestamp { c_type value_; }; +// Define a Date class to represent date data type value +class Date { + public: + using c_type = int32_t; + explicit Date(c_type value) : value_(value) {} + + c_type value() const { return value_; } + + private: + c_type value_; +}; + } // namespace GAR_NAMESPACE_INTERNAL #endif // GAR_UTIL_DATA_TYPE_H_ diff --git a/cpp/src/data_type.cc b/cpp/src/data_type.cc index b490da500..dc59b8e7f 100644 --- a/cpp/src/data_type.cc +++ b/cpp/src/data_type.cc @@ -39,6 +39,8 @@ std::shared_ptr DataType::DataTypeToArrowDataType( return arrow::float64(); case Type::STRING: return arrow::large_utf8(); + case Type::DATE: + return arrow::date32(); case Type::TIMESTAMP: return arrow::timestamp(arrow::TimeUnit::MILLI); case Type::LIST: @@ -67,7 +69,11 @@ std::shared_ptr DataType::ArrowDataTypeToDataType( return string(); case arrow::Type::LARGE_STRING: return string(); + case arrow::Type::DATE32: + return date(); case arrow::Type::TIMESTAMP: + case arrow::Type::DATE64: // Date64 of Arrow is used to represent timestamp + // milliseconds return timestamp(); case arrow::Type::LIST: return list(ArrowDataTypeToDataType(type->field(0)->type())); @@ -93,6 +99,8 @@ std::string DataType::ToTypeName() const { TO_STRING_CASE(FLOAT) TO_STRING_CASE(DOUBLE) TO_STRING_CASE(STRING) + TO_STRING_CASE(DATE) + TO_STRING_CASE(TIMESTAMP) #undef TO_STRING_CASE case Type::USER_DEFINED: @@ -117,6 +125,8 @@ std::shared_ptr DataType::TypeNameToDataType(const std::string& str) { return float64(); } else if (str == "string") { return string(); + } else if (str == "date") { + return date(); } else if (str == "timestamp") { return timestamp(); } else if (str == "list") { @@ -147,6 +157,7 @@ TYPE_FACTORY(int64, Type::INT64) TYPE_FACTORY(float32, Type::FLOAT) TYPE_FACTORY(float64, Type::DOUBLE) TYPE_FACTORY(string, Type::STRING) +TYPE_FACTORY(date, Type::DATE) TYPE_FACTORY(timestamp, Type::TIMESTAMP) std::shared_ptr list(const std::shared_ptr& value_type) { diff --git a/cpp/src/edges_builder.cc b/cpp/src/edges_builder.cc index 9c3ec5354..8e7b4fd4b 100644 --- a/cpp/src/edges_builder.cc +++ b/cpp/src/edges_builder.cc @@ -213,6 +213,26 @@ Status EdgesBuilder::tryToAppend( return Status::OK(); } +template <> +Status EdgesBuilder::tryToAppend( + const std::string& property_name, + std::shared_ptr& array, // NOLINT + const std::vector& edges) { + using CType = typename TypeToArrowType::CType::c_type; + arrow::MemoryPool* pool = arrow::default_memory_pool(); + typename TypeToArrowType::BuilderType builder(pool); + for (const auto& e : edges) { + if (e.Empty() || (!e.ContainProperty(property_name))) { + RETURN_NOT_ARROW_OK(builder.AppendNull()); + } else { + RETURN_NOT_ARROW_OK( + builder.Append(std::any_cast(e.GetProperty(property_name)))); + } + } + array = builder.Finish().ValueOrDie(); + return Status::OK(); +} + Status EdgesBuilder::appendToArray( const std::shared_ptr& type, const std::string& property_name, std::shared_ptr& array, // NOLINT @@ -230,6 +250,8 @@ Status EdgesBuilder::appendToArray( return tryToAppend(property_name, array, edges); case Type::STRING: return tryToAppend(property_name, array, edges); + case Type::DATE: + return tryToAppend(property_name, array, edges); case Type::TIMESTAMP: return tryToAppend(property_name, array, edges); default: diff --git a/cpp/src/graph.cc b/cpp/src/graph.cc index f7c267ee8..7cec1819c 100644 --- a/cpp/src/graph.cc +++ b/cpp/src/graph.cc @@ -54,6 +54,8 @@ Status TryToCastToAny(const std::shared_ptr& type, return CastToAny(array, any); case Type::STRING: return CastToAny(array, any); + case Type::DATE: + return CastToAny(array, any); case Type::TIMESTAMP: return CastToAny(array, any); default: @@ -113,6 +115,21 @@ Result Vertex::property(const std::string& property) const { } } +template <> +Result Vertex::property(const std::string& property) const { + if (properties_.find(property) == properties_.end()) { + return Status::KeyError("Property with name ", property, + " does not exist in the vertex."); + } + try { + Date ret(std::any_cast(properties_.at(property))); + return ret; + } catch (const std::bad_any_cast& e) { + return Status::TypeError("Any cast failed, the property type of ", property, + " is not matched ", e.what()); + } +} + template <> Result Vertex::property(const std::string& property) const { if (properties_.find(property) == properties_.end()) { @@ -199,6 +216,21 @@ Result Edge::property(const std::string& property) const { } } +template <> +Result Edge::property(const std::string& property) const { + if (properties_.find(property) == properties_.end()) { + return Status::KeyError("Property with name ", property, + " does not exist in the edge."); + } + try { + Date ret(std::any_cast(properties_.at(property))); + return ret; + } catch (const std::bad_any_cast& e) { + return Status::TypeError("Any cast failed, the property type of ", property, + " is not matched ", e.what()); + } +} + template <> Result Edge::property(const std::string& property) const { if (properties_.find(property) == properties_.end()) { diff --git a/cpp/src/vertices_builder.cc b/cpp/src/vertices_builder.cc index fdc637338..3ba53c888 100644 --- a/cpp/src/vertices_builder.cc +++ b/cpp/src/vertices_builder.cc @@ -158,6 +158,25 @@ Status VerticesBuilder::tryToAppend( return Status::OK(); } +template <> +Status VerticesBuilder::tryToAppend( + const std::string& property_name, + std::shared_ptr& array) { // NOLINT + using CType = typename TypeToArrowType::CType::c_type; + arrow::MemoryPool* pool = arrow::default_memory_pool(); + typename TypeToArrowType::BuilderType builder(pool); + for (auto& v : vertices_) { + if (v.Empty() || !v.ContainProperty(property_name)) { + RETURN_NOT_ARROW_OK(builder.AppendNull()); + } else { + RETURN_NOT_ARROW_OK( + builder.Append(std::any_cast(v.GetProperty(property_name)))); + } + } + array = builder.Finish().ValueOrDie(); + return Status::OK(); +} + Status VerticesBuilder::appendToArray( const std::shared_ptr& type, const std::string& property_name, std::shared_ptr& array) { // NOLINT @@ -174,6 +193,8 @@ Status VerticesBuilder::appendToArray( return tryToAppend(property_name, array); case Type::STRING: return tryToAppend(property_name, array); + case Type::DATE: + return tryToAppend(property_name, array); case Type::TIMESTAMP: return tryToAppend(property_name, array); default: From 99a1267afd644a1a959364d99cb69d6ca1117e32 Mon Sep 17 00:00:00 2001 From: acezen Date: Mon, 11 Mar 2024 16:02:23 +0800 Subject: [PATCH 3/3] Update --- cpp/src/edges_builder.cc | 7 +++++++ cpp/src/vertices_builder.cc | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/cpp/src/edges_builder.cc b/cpp/src/edges_builder.cc index 8e7b4fd4b..44946af7e 100644 --- a/cpp/src/edges_builder.cc +++ b/cpp/src/edges_builder.cc @@ -152,6 +152,13 @@ Status EdgesBuilder::validate(const Edge& e, invalid_type = true; } break; + case Type::DATE: + // date is stored as int32_t + if (property.second.type() != + typeid(typename TypeToArrowType::CType::c_type)) { + invalid_type = true; + } + break; case Type::TIMESTAMP: // timestamp is stored as int64_t if (property.second.type() != diff --git a/cpp/src/vertices_builder.cc b/cpp/src/vertices_builder.cc index 3ba53c888..a84561340 100644 --- a/cpp/src/vertices_builder.cc +++ b/cpp/src/vertices_builder.cc @@ -99,6 +99,13 @@ Status VerticesBuilder::validate(const Vertex& v, IdType index, invalid_type = true; } break; + case Type::DATE: + // date is stored as int32_t + if (property.second.type() != + typeid(typename TypeToArrowType::CType::c_type)) { + invalid_type = true; + } + break; case Type::TIMESTAMP: // timestamp is stored as int64_t if (property.second.type() !=