diff --git a/cpp/include/gar/writer/edges_builder.h b/cpp/include/gar/writer/edges_builder.h index ff20f156d..16eb06095 100644 --- a/cpp/include/gar/writer/edges_builder.h +++ b/cpp/include/gar/writer/edges_builder.h @@ -147,13 +147,17 @@ class EdgesBuilder { * @param prefix The absolute prefix. * @param adj_list_type The adj list type of the edges. * @param num_vertices The total number of vertices for source or destination. + * @param validate_level The validate level, with no validate by default. */ - explicit EdgesBuilder(const EdgeInfo edge_info, const std::string& prefix, - AdjListType adj_list_type, IdType num_vertices) + explicit EdgesBuilder( + const EdgeInfo edge_info, const std::string& prefix, + AdjListType adj_list_type, IdType num_vertices, + const ValidateLevel& validate_level = ValidateLevel::no_validate) : edge_info_(edge_info), prefix_(prefix), adj_list_type_(adj_list_type), - num_vertices_(num_vertices) { + num_vertices_(num_vertices), + validate_level_(validate_level) { edges_.clear(); num_edges_ = 0; is_saved_ = false; @@ -175,28 +179,32 @@ class EdgesBuilder { } } + /** + * @brief Set the validate level. + * + * @param validate_level The validate level to set. + */ + inline void SetValidateLevel(const ValidateLevel& validate_level) { + validate_level_ = validate_level; + } + + /** + * @brief Get the validate level. + * + * @return The validate level of this writer. + */ + inline ValidateLevel GetValidateLevel() const { return validate_level_; } + /** * @brief Check if adding an edge is allowed. * * @param e The edge to add. + * @param validate_level The validate level for this operation, + * which is the writer's validate level by default. * @return Status: ok or status::InvalidOperation error. */ - Status Validate(const Edge& e) { - // can not add new edges - if (is_saved_) { - return Status::InvalidOperation("can not add new edges after dumping"); - } - // invalid adj list type - if (!edge_info_.ContainAdjList(adj_list_type_)) { - return Status::InvalidOperation("invalid adj list type"); - } - // contain invalid properties - for (auto& property : e.GetProperties()) { - if (!edge_info_.ContainProperty(property.first)) - return Status::InvalidOperation("invalid property"); - } - return Status::OK(); - } + Status Validate(const Edge& e, ValidateLevel validate_level = + ValidateLevel::default_validate) const; /** * @brief Get the vertex chunk index of a given edge. @@ -249,7 +257,8 @@ class EdgesBuilder { */ Status Dump() { // construct the writer - EdgeChunkWriter writer(edge_info_, prefix_, adj_list_type_); + EdgeChunkWriter writer(edge_info_, prefix_, adj_list_type_, + validate_level_); // construct empty edge collections for vertex chunks without edges IdType num_vertex_chunks = (num_vertices_ + vertex_chunk_size_ - 1) / vertex_chunk_size_; @@ -372,6 +381,7 @@ class EdgesBuilder { IdType num_vertices_; IdType num_edges_; bool is_saved_; + ValidateLevel validate_level_; }; } // namespace builder diff --git a/cpp/include/gar/writer/vertices_builder.h b/cpp/include/gar/writer/vertices_builder.h index fa06c4de0..97bf8d1f9 100644 --- a/cpp/include/gar/writer/vertices_builder.h +++ b/cpp/include/gar/writer/vertices_builder.h @@ -129,45 +129,49 @@ class VerticesBuilder { * @param vertex_info The vertex info that describes the vertex type. * @param prefix The absolute prefix. * @param start_vertex_index The start index of the vertices collection. + * @param validate_level The validate level, with no validate by default. */ - explicit VerticesBuilder(const VertexInfo& vertex_info, - const std::string& prefix, - IdType start_vertex_index = 0) + explicit VerticesBuilder( + const VertexInfo& vertex_info, const std::string& prefix, + IdType start_vertex_index = 0, + const ValidateLevel& validate_level = ValidateLevel::no_validate) : vertex_info_(vertex_info), prefix_(prefix), - start_vertex_index_(start_vertex_index) { + start_vertex_index_(start_vertex_index), + validate_level_(validate_level) { vertices_.clear(); num_vertices_ = 0; is_saved_ = false; } + /** + * @brief Set the validate level. + * + * @param validate_level The validate level to set. + */ + inline void SetValidateLevel(const ValidateLevel& validate_level) { + validate_level_ = validate_level; + } + + /** + * @brief Get the validate level. + * + * @return The validate level of this writer. + */ + inline ValidateLevel GetValidateLevel() const { return validate_level_; } + /** * @brief Check if adding a vertex with the given index is allowed. * * @param v The vertex to add. * @param index The given index, -1 means the next unused index. + * @param validate_level The validate level for this operation, + * which is the writer's validate level by default. * @return Status: ok or Status::InvalidOperation error. */ - Status Validate(const Vertex& v, IdType index = -1) const { - // can not add new vertices - if (is_saved_) { - return Status::InvalidOperation("can not add new vertices after dumping"); - } - // start vertex index must be aligned with the chunk size - if (start_vertex_index_ % vertex_info_.GetChunkSize() != 0) { - return Status::InvalidOperation("invalid start vertex index"); - } - // vertex index must larger than start index - if (index != -1 && index < start_vertex_index_) - return Status::InvalidOperation( - "vertex index must larger than start index"); - // contain invalid properties - for (auto& property : v.GetProperties()) { - if (!vertex_info_.ContainProperty(property.first)) - return Status::InvalidOperation("invalid property"); - } - return Status::OK(); - } + Status Validate( + const Vertex& v, IdType index = -1, + ValidateLevel validate_level = ValidateLevel::default_validate) const; /** * @brief Add a vertex with the given index. @@ -207,7 +211,7 @@ class VerticesBuilder { */ Status Dump() { // construct the writer - VertexPropertyWriter writer(vertex_info_, prefix_); + VertexPropertyWriter writer(vertex_info_, prefix_, validate_level_); IdType start_chunk_index = start_vertex_index_ / vertex_info_.GetChunkSize(); // convert to table @@ -257,6 +261,7 @@ class VerticesBuilder { IdType start_vertex_index_; IdType num_vertices_; bool is_saved_; + ValidateLevel validate_level_; }; } // namespace builder diff --git a/cpp/src/edges_builder.cc b/cpp/src/edges_builder.cc index 2a9b46742..d5eff84f7 100644 --- a/cpp/src/edges_builder.cc +++ b/cpp/src/edges_builder.cc @@ -20,6 +20,91 @@ limitations under the License. namespace GAR_NAMESPACE_INTERNAL { namespace builder { +Status EdgesBuilder::Validate(const Edge& e, + ValidateLevel validate_level) const { + // use the builder's validate level + if (validate_level == ValidateLevel::default_validate) + validate_level = validate_level_; + // no validate + if (validate_level == ValidateLevel::no_validate) + return Status::OK(); + + // weak validate + // can not add new edges after dumping + if (is_saved_) { + return Status::InvalidOperation("can not add new edges after dumping"); + } + // invalid adj list type + if (!edge_info_.ContainAdjList(adj_list_type_)) { + return Status::InvalidOperation( + "the adj list type " + + std::string(AdjListTypeToString(adj_list_type_)) + + " does not exist in the edge info"); + } + + // strong validate + if (validate_level == ValidateLevel::strong_validate) { + for (auto& property : e.GetProperties()) { + // check if the property is contained + if (!edge_info_.ContainProperty(property.first)) + return Status::InvalidOperation( + "invalid property name: " + property.first + + ", which is not contained in the vertex info"); + // check if the property type is correct + auto type = edge_info_.GetPropertyType(property.first).value(); + bool invalid_type = false; + switch (type.id()) { + case Type::BOOL: + if (property.second.type() != + typeid(typename ConvertToArrowType::CType)) { + invalid_type = true; + } + break; + case Type::INT32: + if (property.second.type() != + typeid(typename ConvertToArrowType::CType)) { + invalid_type = true; + } + break; + case Type::INT64: + if (property.second.type() != + typeid(typename ConvertToArrowType::CType)) { + invalid_type = true; + } + break; + case Type::FLOAT: + if (property.second.type() != + typeid(typename ConvertToArrowType::CType)) { + invalid_type = true; + } + break; + case Type::DOUBLE: + if (property.second.type() != + typeid(typename ConvertToArrowType::CType)) { + invalid_type = true; + } + break; + case Type::STRING: + if (property.second.type() != + typeid(typename ConvertToArrowType::CType)) { + invalid_type = true; + } + break; + default: + return Status::TypeError("unsupported property type"); + } + if (invalid_type) { + std::string err_msg = + "invalid data type for property: " + property.first + + ", defined as " + type.ToTypeName() + ", but got " + + property.second.type().name(); + return Status::TypeError(err_msg); + } + } + } + return Status::OK(); +} + Status EdgesBuilder::appendToArray( const DataType& type, const std::string& property_name, std::shared_ptr& array, // NOLINT diff --git a/cpp/src/vertices_builder.cc b/cpp/src/vertices_builder.cc index 2746c26e9..74408116b 100644 --- a/cpp/src/vertices_builder.cc +++ b/cpp/src/vertices_builder.cc @@ -19,6 +19,94 @@ limitations under the License. namespace GAR_NAMESPACE_INTERNAL { namespace builder { +Status VerticesBuilder::Validate(const Vertex& v, IdType index, + ValidateLevel validate_level) const { + // use the builder's validate level + if (validate_level == ValidateLevel::default_validate) + validate_level = validate_level_; + // no validate + if (validate_level == ValidateLevel::no_validate) + return Status::OK(); + + // weak validate + // can not add new vertices after dumping + if (is_saved_) { + return Status::InvalidOperation("can not add new vertices after dumping"); + } + // the start vertex index must be aligned with the chunk size + if (start_vertex_index_ % vertex_info_.GetChunkSize() != 0) { + return Status::InvalidOperation( + "the start vertex index must be aligned " + "with the chunk size"); + } + // the vertex index must larger than start index + if (index != -1 && index < start_vertex_index_) + return Status::InvalidOperation( + "the vertex index must be larger than start index"); + + // strong validate + if (validate_level == ValidateLevel::strong_validate) { + for (auto& property : v.GetProperties()) { + // check if the property is contained + if (!vertex_info_.ContainProperty(property.first)) + return Status::InvalidOperation( + "invalid property name: " + property.first + + ", which is not contained in the vertex info"); + // check if the property type is correct + auto type = vertex_info_.GetPropertyType(property.first).value(); + bool invalid_type = false; + switch (type.id()) { + case Type::BOOL: + if (property.second.type() != + typeid(typename ConvertToArrowType::CType)) { + invalid_type = true; + } + break; + case Type::INT32: + if (property.second.type() != + typeid(typename ConvertToArrowType::CType)) { + invalid_type = true; + } + break; + case Type::INT64: + if (property.second.type() != + typeid(typename ConvertToArrowType::CType)) { + invalid_type = true; + } + break; + case Type::FLOAT: + if (property.second.type() != + typeid(typename ConvertToArrowType::CType)) { + invalid_type = true; + } + break; + case Type::DOUBLE: + if (property.second.type() != + typeid(typename ConvertToArrowType::CType)) { + invalid_type = true; + } + break; + case Type::STRING: + if (property.second.type() != + typeid(typename ConvertToArrowType::CType)) { + invalid_type = true; + } + break; + default: + return Status::TypeError("unsupported property type"); + } + if (invalid_type) { + std::string err_msg = + "invalid data type for property: " + property.first + + ", defined as " + type.ToTypeName() + ", but got " + + property.second.type().name(); + return Status::TypeError(err_msg); + } + } + } + return Status::OK(); +} + Status VerticesBuilder::appendToArray( const DataType& type, const std::string& property_name, std::shared_ptr& array) { // NOLINT diff --git a/cpp/test/test_builder.cc b/cpp/test/test_builder.cc index ac33e83e7..b61973be2 100644 --- a/cpp/test/test_builder.cc +++ b/cpp/test/test_builder.cc @@ -38,9 +38,11 @@ limitations under the License. #include TEST_CASE("test_vertices_builder") { + std::cout << "Test vertex builder" << std::endl; std::string root; REQUIRE(GetTestResourceRoot(&root).ok()); + // construct vertex builder std::string vertex_meta_file = root + "/ldbc_sample/parquet/" + "person.vertex.yml"; auto vertex_meta = GAR_NAMESPACE::Yaml::LoadFile(vertex_meta_file).value(); @@ -49,6 +51,28 @@ TEST_CASE("test_vertices_builder") { GAR_NAMESPACE::builder::VerticesBuilder builder(vertex_info, "/tmp/", start_index); + // set validate level + REQUIRE(builder.GetValidateLevel() == + GAR_NAMESPACE::ValidateLevel::no_validate); + builder.SetValidateLevel(GAR_NAMESPACE::ValidateLevel::strong_validate); + REQUIRE(builder.GetValidateLevel() == + GAR_NAMESPACE::ValidateLevel::strong_validate); + + // check different validate levels + GAR_NAMESPACE::builder::Vertex v; + v.AddProperty("id", "id_of_string"); + REQUIRE( + builder.Validate(v, 0, GAR_NAMESPACE::ValidateLevel::no_validate).ok()); + REQUIRE( + builder.Validate(v, 0, GAR_NAMESPACE::ValidateLevel::weak_validate).ok()); + REQUIRE(builder.Validate(v, -2, GAR_NAMESPACE::ValidateLevel::weak_validate) + .IsInvalidOperation()); + REQUIRE(builder.Validate(v, 0, GAR_NAMESPACE::ValidateLevel::strong_validate) + .IsTypeError()); + v.AddProperty("invalid_name", "invalid_value"); + REQUIRE(builder.Validate(v, 0).IsInvalidOperation()); + + // add vertices std::ifstream fp(root + "/ldbc_sample/person_0_0.csv"); std::string line; getline(fp, line); @@ -60,7 +84,6 @@ TEST_CASE("test_vertices_builder") { getline(readstr, name, '|'); names.push_back(name); } - int index = 0; while (getline(fp, line)) { std::string val; std::istringstream readstr(line); @@ -76,11 +99,16 @@ TEST_CASE("test_vertices_builder") { v.AddProperty(names[i], val); } } - index++; REQUIRE(builder.AddVertex(v).ok()); } + + // dump REQUIRE(builder.Dump().ok()); + // can not add new vertices after dumping + REQUIRE(builder.AddVertex(v).IsInvalidOperation()); + + // check the number of vertices auto fs = arrow::fs::FileSystemFromUriOrPath(root).ValueOrDie(); auto input = fs->OpenInputStream("/tmp/vertex/person/vertex_count").ValueOrDie(); @@ -90,9 +118,11 @@ TEST_CASE("test_vertices_builder") { } TEST_CASE("test_edges_builder") { + std::cout << "Test edge builder" << std::endl; std::string root; REQUIRE(GetTestResourceRoot(&root).ok()); + // construct edge builder std::string edge_meta_file = root + "/ldbc_sample/parquet/" + "person_knows_person.edge.yml"; auto edge_meta = GAR_NAMESPACE::Yaml::LoadFile(edge_meta_file).value(); @@ -100,6 +130,25 @@ TEST_CASE("test_edges_builder") { GAR_NAMESPACE::builder::EdgesBuilder builder( edge_info, "/tmp/", GraphArchive::AdjListType::ordered_by_dest, 903); + // set validate level + REQUIRE(builder.GetValidateLevel() == + GAR_NAMESPACE::ValidateLevel::no_validate); + builder.SetValidateLevel(GAR_NAMESPACE::ValidateLevel::strong_validate); + REQUIRE(builder.GetValidateLevel() == + GAR_NAMESPACE::ValidateLevel::strong_validate); + + // check different validate levels + GAR_NAMESPACE::builder::Edge e(0, 1); + e.AddProperty("creationDate", 2020); + REQUIRE(builder.Validate(e, GAR_NAMESPACE::ValidateLevel::no_validate).ok()); + REQUIRE( + builder.Validate(e, GAR_NAMESPACE::ValidateLevel::weak_validate).ok()); + REQUIRE(builder.Validate(e, GAR_NAMESPACE::ValidateLevel::strong_validate) + .IsTypeError()); + e.AddProperty("invalid_name", "invalid_value"); + REQUIRE(builder.Validate(e).IsInvalidOperation()); + + // add edges std::ifstream fp(root + "/ldbc_sample/person_knows_person_0_0.csv"); std::string line; getline(fp, line); @@ -128,6 +177,10 @@ TEST_CASE("test_edges_builder") { } } } - std::cout << "Test edge builder" << std::endl; + + // dump REQUIRE(builder.Dump().ok()); + + // can not add new edges after dumping + REQUIRE(builder.AddEdge(e).IsInvalidOperation()); }