Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[C++][Improvement] Add validation of different levels for builders in C++ library #181

Merged
merged 3 commits into from
Jun 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 30 additions & 20 deletions cpp/include/gar/writer/edges_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,13 +147,17 @@ class EdgesBuilder {
* @param prefix The absolute prefix.
* @param adj_list_type The adj list type of the edges.
* @param num_vertices The total number of vertices for source or destination.
* @param validate_level The validate level, with no validate by default.
*/
explicit EdgesBuilder(const EdgeInfo edge_info, const std::string& prefix,
AdjListType adj_list_type, IdType num_vertices)
explicit EdgesBuilder(
const EdgeInfo edge_info, const std::string& prefix,
AdjListType adj_list_type, IdType num_vertices,
const ValidateLevel& validate_level = ValidateLevel::no_validate)
: edge_info_(edge_info),
prefix_(prefix),
adj_list_type_(adj_list_type),
num_vertices_(num_vertices) {
num_vertices_(num_vertices),
validate_level_(validate_level) {
edges_.clear();
num_edges_ = 0;
is_saved_ = false;
Expand All @@ -175,28 +179,32 @@ class EdgesBuilder {
}
}

/**
* @brief Set the validate level.
*
* @param validate_level The validate level to set.
*/
inline void SetValidateLevel(const ValidateLevel& validate_level) {
validate_level_ = validate_level;
}

/**
* @brief Get the validate level.
*
* @return The validate level of this writer.
*/
inline ValidateLevel GetValidateLevel() const { return validate_level_; }

/**
* @brief Check if adding an edge is allowed.
*
* @param e The edge to add.
* @param validate_level The validate level for this operation,
* which is the writer's validate level by default.
* @return Status: ok or status::InvalidOperation error.
*/
Status Validate(const Edge& e) {
// can not add new edges
if (is_saved_) {
return Status::InvalidOperation("can not add new edges after dumping");
}
// invalid adj list type
if (!edge_info_.ContainAdjList(adj_list_type_)) {
return Status::InvalidOperation("invalid adj list type");
}
// contain invalid properties
for (auto& property : e.GetProperties()) {
if (!edge_info_.ContainProperty(property.first))
return Status::InvalidOperation("invalid property");
}
return Status::OK();
}
Status Validate(const Edge& e, ValidateLevel validate_level =
ValidateLevel::default_validate) const;

/**
* @brief Get the vertex chunk index of a given edge.
Expand Down Expand Up @@ -249,7 +257,8 @@ class EdgesBuilder {
*/
Status Dump() {
// construct the writer
EdgeChunkWriter writer(edge_info_, prefix_, adj_list_type_);
EdgeChunkWriter writer(edge_info_, prefix_, adj_list_type_,
validate_level_);
// construct empty edge collections for vertex chunks without edges
IdType num_vertex_chunks =
(num_vertices_ + vertex_chunk_size_ - 1) / vertex_chunk_size_;
Expand Down Expand Up @@ -372,6 +381,7 @@ class EdgesBuilder {
IdType num_vertices_;
IdType num_edges_;
bool is_saved_;
ValidateLevel validate_level_;
};

} // namespace builder
Expand Down
55 changes: 30 additions & 25 deletions cpp/include/gar/writer/vertices_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,45 +129,49 @@ class VerticesBuilder {
* @param vertex_info The vertex info that describes the vertex type.
* @param prefix The absolute prefix.
* @param start_vertex_index The start index of the vertices collection.
* @param validate_level The validate level, with no validate by default.
*/
explicit VerticesBuilder(const VertexInfo& vertex_info,
const std::string& prefix,
IdType start_vertex_index = 0)
explicit VerticesBuilder(
const VertexInfo& vertex_info, const std::string& prefix,
IdType start_vertex_index = 0,
const ValidateLevel& validate_level = ValidateLevel::no_validate)
: vertex_info_(vertex_info),
prefix_(prefix),
start_vertex_index_(start_vertex_index) {
start_vertex_index_(start_vertex_index),
validate_level_(validate_level) {
vertices_.clear();
num_vertices_ = 0;
is_saved_ = false;
}

/**
* @brief Set the validate level.
*
* @param validate_level The validate level to set.
*/
inline void SetValidateLevel(const ValidateLevel& validate_level) {
validate_level_ = validate_level;
}

/**
* @brief Get the validate level.
*
* @return The validate level of this writer.
*/
inline ValidateLevel GetValidateLevel() const { return validate_level_; }

/**
* @brief Check if adding a vertex with the given index is allowed.
*
* @param v The vertex to add.
* @param index The given index, -1 means the next unused index.
* @param validate_level The validate level for this operation,
* which is the writer's validate level by default.
* @return Status: ok or Status::InvalidOperation error.
*/
Status Validate(const Vertex& v, IdType index = -1) const {
// can not add new vertices
if (is_saved_) {
return Status::InvalidOperation("can not add new vertices after dumping");
}
// start vertex index must be aligned with the chunk size
if (start_vertex_index_ % vertex_info_.GetChunkSize() != 0) {
return Status::InvalidOperation("invalid start vertex index");
}
// vertex index must larger than start index
if (index != -1 && index < start_vertex_index_)
return Status::InvalidOperation(
"vertex index must larger than start index");
// contain invalid properties
for (auto& property : v.GetProperties()) {
if (!vertex_info_.ContainProperty(property.first))
return Status::InvalidOperation("invalid property");
}
return Status::OK();
}
Status Validate(
const Vertex& v, IdType index = -1,
ValidateLevel validate_level = ValidateLevel::default_validate) const;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks for the changes. I have some question about the validate level here:

  1. What's the difference between this validate and builder's validate?
    if they are the same, why the default value is different?
  2. it seems that the method could set different validate level for different vertex, but it seems a little weird different vertices have different validate level in the same builder?

Copy link
Contributor Author

@lixueclaire lixueclaire Jun 1, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

default_validate is to use the validate_level of the builder, which is a global setting and will be passed to construct the low-level writer. On the other hand, we also support to specify a validate_level that is only used in the function of adding a vertex.

We provide different levels to be more flexible. For example, a group of vertices are generated together, and have the same schema. The user may only need to validate the first vertex, and others may being added safely.


/**
* @brief Add a vertex with the given index.
Expand Down Expand Up @@ -207,7 +211,7 @@ class VerticesBuilder {
*/
Status Dump() {
// construct the writer
VertexPropertyWriter writer(vertex_info_, prefix_);
VertexPropertyWriter writer(vertex_info_, prefix_, validate_level_);
IdType start_chunk_index =
start_vertex_index_ / vertex_info_.GetChunkSize();
// convert to table
Expand Down Expand Up @@ -257,6 +261,7 @@ class VerticesBuilder {
IdType start_vertex_index_;
IdType num_vertices_;
bool is_saved_;
ValidateLevel validate_level_;
};

} // namespace builder
Expand Down
85 changes: 85 additions & 0 deletions cpp/src/edges_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,91 @@ limitations under the License.
namespace GAR_NAMESPACE_INTERNAL {
namespace builder {

Status EdgesBuilder::Validate(const Edge& e,
ValidateLevel validate_level) const {
// use the builder's validate level
if (validate_level == ValidateLevel::default_validate)
validate_level = validate_level_;
// no validate
if (validate_level == ValidateLevel::no_validate)
return Status::OK();

// weak validate
// can not add new edges after dumping
if (is_saved_) {
return Status::InvalidOperation("can not add new edges after dumping");
}
// invalid adj list type
if (!edge_info_.ContainAdjList(adj_list_type_)) {
return Status::InvalidOperation(
"the adj list type " +
std::string(AdjListTypeToString(adj_list_type_)) +
" does not exist in the edge info");
}

// strong validate
if (validate_level == ValidateLevel::strong_validate) {
for (auto& property : e.GetProperties()) {
// check if the property is contained
if (!edge_info_.ContainProperty(property.first))
return Status::InvalidOperation(
"invalid property name: " + property.first +
", which is not contained in the vertex info");
// check if the property type is correct
auto type = edge_info_.GetPropertyType(property.first).value();
bool invalid_type = false;
switch (type.id()) {
case Type::BOOL:
if (property.second.type() !=
typeid(typename ConvertToArrowType<Type::BOOL>::CType)) {
invalid_type = true;
}
break;
case Type::INT32:
if (property.second.type() !=
typeid(typename ConvertToArrowType<Type::INT32>::CType)) {
invalid_type = true;
}
break;
case Type::INT64:
if (property.second.type() !=
typeid(typename ConvertToArrowType<Type::INT64>::CType)) {
invalid_type = true;
}
break;
case Type::FLOAT:
if (property.second.type() !=
typeid(typename ConvertToArrowType<Type::FLOAT>::CType)) {
invalid_type = true;
}
break;
case Type::DOUBLE:
if (property.second.type() !=
typeid(typename ConvertToArrowType<Type::DOUBLE>::CType)) {
invalid_type = true;
}
break;
case Type::STRING:
if (property.second.type() !=
typeid(typename ConvertToArrowType<Type::STRING>::CType)) {
invalid_type = true;
}
break;
default:
return Status::TypeError("unsupported property type");
}
if (invalid_type) {
std::string err_msg =
"invalid data type for property: " + property.first +
", defined as " + type.ToTypeName() + ", but got " +
property.second.type().name();
return Status::TypeError(err_msg);
}
}
}
return Status::OK();
}

Status EdgesBuilder::appendToArray(
const DataType& type, const std::string& property_name,
std::shared_ptr<arrow::Array>& array, // NOLINT
Expand Down
88 changes: 88 additions & 0 deletions cpp/src/vertices_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,94 @@ limitations under the License.
namespace GAR_NAMESPACE_INTERNAL {
namespace builder {

Status VerticesBuilder::Validate(const Vertex& v, IdType index,
ValidateLevel validate_level) const {
// use the builder's validate level
if (validate_level == ValidateLevel::default_validate)
validate_level = validate_level_;
// no validate
if (validate_level == ValidateLevel::no_validate)
return Status::OK();

// weak validate
// can not add new vertices after dumping
if (is_saved_) {
return Status::InvalidOperation("can not add new vertices after dumping");
}
// the start vertex index must be aligned with the chunk size
if (start_vertex_index_ % vertex_info_.GetChunkSize() != 0) {
return Status::InvalidOperation(
"the start vertex index must be aligned "
"with the chunk size");
}
// the vertex index must larger than start index
if (index != -1 && index < start_vertex_index_)
return Status::InvalidOperation(
"the vertex index must be larger than start index");

// strong validate
if (validate_level == ValidateLevel::strong_validate) {
for (auto& property : v.GetProperties()) {
// check if the property is contained
if (!vertex_info_.ContainProperty(property.first))
return Status::InvalidOperation(
"invalid property name: " + property.first +
", which is not contained in the vertex info");
// check if the property type is correct
auto type = vertex_info_.GetPropertyType(property.first).value();
bool invalid_type = false;
switch (type.id()) {
case Type::BOOL:
if (property.second.type() !=
typeid(typename ConvertToArrowType<Type::BOOL>::CType)) {
invalid_type = true;
}
break;
case Type::INT32:
if (property.second.type() !=
typeid(typename ConvertToArrowType<Type::INT32>::CType)) {
invalid_type = true;
}
break;
case Type::INT64:
if (property.second.type() !=
typeid(typename ConvertToArrowType<Type::INT64>::CType)) {
invalid_type = true;
}
break;
case Type::FLOAT:
if (property.second.type() !=
typeid(typename ConvertToArrowType<Type::FLOAT>::CType)) {
invalid_type = true;
}
break;
case Type::DOUBLE:
if (property.second.type() !=
typeid(typename ConvertToArrowType<Type::DOUBLE>::CType)) {
invalid_type = true;
}
break;
case Type::STRING:
if (property.second.type() !=
typeid(typename ConvertToArrowType<Type::STRING>::CType)) {
invalid_type = true;
}
break;
default:
return Status::TypeError("unsupported property type");
}
if (invalid_type) {
std::string err_msg =
"invalid data type for property: " + property.first +
", defined as " + type.ToTypeName() + ", but got " +
property.second.type().name();
return Status::TypeError(err_msg);
}
}
}
return Status::OK();
}

Status VerticesBuilder::appendToArray(
const DataType& type, const std::string& property_name,
std::shared_ptr<arrow::Array>& array) { // NOLINT
Expand Down
Loading