Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feat][C++] Support Date and Timestamp data type #398

Merged
merged 3 commits into from
Mar 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cpp/include/gar/fwd.h
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,10 @@ const std::shared_ptr<DataType>& float32();
const std::shared_ptr<DataType>& float64();
/// @brief Return a string DataType instance
const std::shared_ptr<DataType>& string();
/// @brief Return a date DataType instance
const std::shared_ptr<DataType>& date();
/// @brief Return a timestamp DataType instance
const std::shared_ptr<DataType>& timestamp();
/**
* @brief Return a list DataType instance
*
Expand Down
5 changes: 5 additions & 0 deletions cpp/include/gar/util/convert_to_arrow_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,11 @@ CONVERT_TO_ARROW_TYPE(Type::DOUBLE, double, arrow::DoubleType,
CONVERT_TO_ARROW_TYPE(Type::STRING, std::string, arrow::LargeStringType,
arrow::LargeStringArray, arrow::LargeStringBuilder,
arrow::large_utf8(), "string")
CONVERT_TO_ARROW_TYPE(Type::TIMESTAMP, Timestamp, arrow::TimestampType,
arrow::TimestampArray, arrow::TimestampBuilder,
arrow::timestamp(arrow::TimeUnit::MILLI), "timestamp")
CONVERT_TO_ARROW_TYPE(Type::DATE, Date, arrow::Date32Type, arrow::Date32Array,
arrow::Date32Builder, arrow::date32(), "date")

} // namespace GAR_NAMESPACE_INTERNAL

Expand Down
31 changes: 31 additions & 0 deletions cpp/include/gar/util/data_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@ enum class Type {
/** List of some logical data type */
LIST,

/** int32_t days since the UNIX epoch */
DATE,

/** Exact timestamp encoded with int64 since UNIX epoch in milliseconds */
TIMESTAMP,

/** User-defined data type */
USER_DEFINED,

Expand Down Expand Up @@ -125,6 +131,31 @@ class DataType {
std::shared_ptr<DataType> child_;
std::string user_defined_type_name_;
}; // struct DataType

// Define a Timestamp class to represent timestamp data type value
class Timestamp {
public:
using c_type = int64_t;
explicit Timestamp(c_type value) : value_(value) {}

c_type value() const { return value_; }

private:
c_type value_;
};

// Define a Date class to represent date data type value
class Date {
public:
using c_type = int32_t;
explicit Date(c_type value) : value_(value) {}

c_type value() const { return value_; }

private:
c_type value_;
};

} // namespace GAR_NAMESPACE_INTERNAL

#endif // GAR_UTIL_DATA_TYPE_H_
18 changes: 18 additions & 0 deletions cpp/src/data_type.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ std::shared_ptr<arrow::DataType> DataType::DataTypeToArrowDataType(
return arrow::float64();
case Type::STRING:
return arrow::large_utf8();
case Type::DATE:
return arrow::date32();
case Type::TIMESTAMP:
return arrow::timestamp(arrow::TimeUnit::MILLI);
case Type::LIST:
return arrow::list(DataTypeToArrowDataType(type->child_));
default:
Expand All @@ -65,6 +69,12 @@ std::shared_ptr<DataType> DataType::ArrowDataTypeToDataType(
return string();
case arrow::Type::LARGE_STRING:
return string();
case arrow::Type::DATE32:
return date();
case arrow::Type::TIMESTAMP:
case arrow::Type::DATE64: // Date64 of Arrow is used to represent timestamp
// milliseconds
return timestamp();
case arrow::Type::LIST:
return list(ArrowDataTypeToDataType(type->field(0)->type()));
default:
Expand All @@ -89,6 +99,8 @@ std::string DataType::ToTypeName() const {
TO_STRING_CASE(FLOAT)
TO_STRING_CASE(DOUBLE)
TO_STRING_CASE(STRING)
TO_STRING_CASE(DATE)
TO_STRING_CASE(TIMESTAMP)

#undef TO_STRING_CASE
case Type::USER_DEFINED:
Expand All @@ -113,6 +125,10 @@ std::shared_ptr<DataType> DataType::TypeNameToDataType(const std::string& str) {
return float64();
} else if (str == "string") {
return string();
} else if (str == "date") {
return date();
} else if (str == "timestamp") {
return timestamp();
} else if (str == "list<int32>") {
return list(int32());
} else if (str == "list<int64>") {
Expand Down Expand Up @@ -141,6 +157,8 @@ TYPE_FACTORY(int64, Type::INT64)
TYPE_FACTORY(float32, Type::FLOAT)
TYPE_FACTORY(float64, Type::DOUBLE)
TYPE_FACTORY(string, Type::STRING)
TYPE_FACTORY(date, Type::DATE)
TYPE_FACTORY(timestamp, Type::TIMESTAMP)

std::shared_ptr<DataType> list(const std::shared_ptr<DataType>& value_type) {
return std::make_shared<DataType>(Type::LIST, value_type);
Expand Down
99 changes: 79 additions & 20 deletions cpp/src/edges_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,20 @@ Status EdgesBuilder::validate(const Edge& e,
invalid_type = true;
}
break;
case Type::DATE:
// date is stored as int32_t
if (property.second.type() !=
typeid(typename TypeToArrowType<Type::DATE>::CType::c_type)) {
invalid_type = true;
}
break;
case Type::TIMESTAMP:
// timestamp is stored as int64_t
if (property.second.type() !=
typeid(typename TypeToArrowType<Type::TIMESTAMP>::CType::c_type)) {
invalid_type = true;
}
break;
default:
return Status::TypeError("Unsupported property type.");
}
Expand All @@ -165,6 +179,67 @@ Status EdgesBuilder::validate(const Edge& e,
return Status::OK();
}

template <Type type>
Status EdgesBuilder::tryToAppend(
const std::string& property_name,
std::shared_ptr<arrow::Array>& array, // NOLINT
const std::vector<Edge>& edges) {
using CType = typename TypeToArrowType<type>::CType;
arrow::MemoryPool* pool = arrow::default_memory_pool();
typename TypeToArrowType<type>::BuilderType builder(pool);
for (const auto& e : edges) {
if (e.Empty() || (!e.ContainProperty(property_name))) {
RETURN_NOT_ARROW_OK(builder.AppendNull());
} else {
RETURN_NOT_ARROW_OK(
builder.Append(std::any_cast<CType>(e.GetProperty(property_name))));
}
}
array = builder.Finish().ValueOrDie();
return Status::OK();
}

template <>
Status EdgesBuilder::tryToAppend<Type::TIMESTAMP>(
const std::string& property_name,
std::shared_ptr<arrow::Array>& array, // NOLINT
const std::vector<Edge>& edges) {
using CType = typename TypeToArrowType<Type::TIMESTAMP>::CType::c_type;
arrow::MemoryPool* pool = arrow::default_memory_pool();
typename TypeToArrowType<Type::TIMESTAMP>::BuilderType builder(
arrow::timestamp(arrow::TimeUnit::MILLI), pool);
for (const auto& e : edges) {
if (e.Empty() || (!e.ContainProperty(property_name))) {
RETURN_NOT_ARROW_OK(builder.AppendNull());
} else {
RETURN_NOT_ARROW_OK(
builder.Append(std::any_cast<CType>(e.GetProperty(property_name))));
}
}
array = builder.Finish().ValueOrDie();
return Status::OK();
}

template <>
Status EdgesBuilder::tryToAppend<Type::DATE>(
const std::string& property_name,
std::shared_ptr<arrow::Array>& array, // NOLINT
const std::vector<Edge>& edges) {
using CType = typename TypeToArrowType<Type::DATE>::CType::c_type;
arrow::MemoryPool* pool = arrow::default_memory_pool();
typename TypeToArrowType<Type::DATE>::BuilderType builder(pool);
for (const auto& e : edges) {
if (e.Empty() || (!e.ContainProperty(property_name))) {
RETURN_NOT_ARROW_OK(builder.AppendNull());
} else {
RETURN_NOT_ARROW_OK(
builder.Append(std::any_cast<CType>(e.GetProperty(property_name))));
}
}
array = builder.Finish().ValueOrDie();
return Status::OK();
}

Status EdgesBuilder::appendToArray(
const std::shared_ptr<DataType>& type, const std::string& property_name,
std::shared_ptr<arrow::Array>& array, // NOLINT
Expand All @@ -182,32 +257,16 @@ Status EdgesBuilder::appendToArray(
return tryToAppend<Type::DOUBLE>(property_name, array, edges);
case Type::STRING:
return tryToAppend<Type::STRING>(property_name, array, edges);
case Type::DATE:
return tryToAppend<Type::DATE>(property_name, array, edges);
case Type::TIMESTAMP:
return tryToAppend<Type::TIMESTAMP>(property_name, array, edges);
default:
return Status::TypeError("Unsupported property type.");
}
return Status::OK();
}

template <Type type>
Status EdgesBuilder::tryToAppend(
const std::string& property_name,
std::shared_ptr<arrow::Array>& array, // NOLINT
const std::vector<Edge>& edges) {
using CType = typename TypeToArrowType<type>::CType;
arrow::MemoryPool* pool = arrow::default_memory_pool();
typename TypeToArrowType<type>::BuilderType builder(pool);
for (const auto& e : edges) {
if (e.Empty() || (!e.ContainProperty(property_name))) {
RETURN_NOT_ARROW_OK(builder.AppendNull());
} else {
RETURN_NOT_ARROW_OK(
builder.Append(std::any_cast<CType>(e.GetProperty(property_name))));
}
}
array = builder.Finish().ValueOrDie();
return Status::OK();
}

Status EdgesBuilder::tryToAppend(
int src_or_dest,
std::shared_ptr<arrow::Array>& array, // NOLINT
Expand Down
64 changes: 64 additions & 0 deletions cpp/src/graph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ Status TryToCastToAny(const std::shared_ptr<DataType>& type,
return CastToAny<Type::DOUBLE>(array, any);
case Type::STRING:
return CastToAny<Type::STRING>(array, any);
case Type::DATE:
return CastToAny<Type::DATE>(array, any);
case Type::TIMESTAMP:
return CastToAny<Type::TIMESTAMP>(array, any);
default:
return Status::TypeError("Unsupported type.");
}
Expand Down Expand Up @@ -111,6 +115,36 @@ Result<T> Vertex::property(const std::string& property) const {
}
}

template <>
Result<Date> Vertex::property(const std::string& property) const {
if (properties_.find(property) == properties_.end()) {
return Status::KeyError("Property with name ", property,
" does not exist in the vertex.");
}
try {
Date ret(std::any_cast<Date::c_type>(properties_.at(property)));
return ret;
} catch (const std::bad_any_cast& e) {
return Status::TypeError("Any cast failed, the property type of ", property,
" is not matched ", e.what());
}
}

template <>
Result<Timestamp> Vertex::property(const std::string& property) const {
if (properties_.find(property) == properties_.end()) {
return Status::KeyError("Property with name ", property,
" does not exist in the vertex.");
}
try {
Timestamp ret(std::any_cast<Timestamp::c_type>(properties_.at(property)));
return ret;
} catch (const std::bad_any_cast& e) {
return Status::TypeError("Any cast failed, the property type of ", property,
" is not matched ", e.what());
}
}

template <>
Result<StringArray> Vertex::property(const std::string& property) const {
auto it = list_properties_.find(property);
Expand Down Expand Up @@ -182,6 +216,36 @@ Result<T> Edge::property(const std::string& property) const {
}
}

template <>
Result<Date> Edge::property(const std::string& property) const {
if (properties_.find(property) == properties_.end()) {
return Status::KeyError("Property with name ", property,
" does not exist in the edge.");
}
try {
Date ret(std::any_cast<Date::c_type>(properties_.at(property)));
return ret;
} catch (const std::bad_any_cast& e) {
return Status::TypeError("Any cast failed, the property type of ", property,
" is not matched ", e.what());
}
}

template <>
Result<Timestamp> Edge::property(const std::string& property) const {
if (properties_.find(property) == properties_.end()) {
return Status::KeyError("Property with name ", property,
" does not exist in the edge.");
}
try {
Timestamp ret(std::any_cast<Timestamp::c_type>(properties_.at(property)));
return ret;
} catch (const std::bad_any_cast& e) {
return Status::TypeError("Any cast failed, the property type of ", property,
" is not matched ", e.what());
}
}

template <>
Result<StringArray> Edge::property(const std::string& property) const {
auto it = list_properties_.find(property);
Expand Down
Loading
Loading