From f645a3c1daf8fc31ed592cef5bddfb9715783665 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=89=E7=90=86?= Date: Wed, 31 May 2023 17:37:49 +0800 Subject: [PATCH 1/3] Add validation of different levels for vertices_builder --- cpp/include/gar/writer/vertices_builder.h | 56 ++++++++------- cpp/src/vertices_builder.cc | 88 +++++++++++++++++++++++ 2 files changed, 118 insertions(+), 26 deletions(-) diff --git a/cpp/include/gar/writer/vertices_builder.h b/cpp/include/gar/writer/vertices_builder.h index fa06c4de0..65ec71c15 100644 --- a/cpp/include/gar/writer/vertices_builder.h +++ b/cpp/include/gar/writer/vertices_builder.h @@ -129,46 +129,49 @@ class VerticesBuilder { * @param vertex_info The vertex info that describes the vertex type. * @param prefix The absolute prefix. * @param start_vertex_index The start index of the vertices collection. + * @param validate_level The validate level, with no validate by default. */ - explicit VerticesBuilder(const VertexInfo& vertex_info, - const std::string& prefix, - IdType start_vertex_index = 0) + explicit VerticesBuilder( + const VertexInfo& vertex_info, const std::string& prefix, + IdType start_vertex_index = 0, + const ValidateLevel& validate_level = ValidateLevel::no_validate) : vertex_info_(vertex_info), prefix_(prefix), - start_vertex_index_(start_vertex_index) { + start_vertex_index_(start_vertex_index), + validate_level_(validate_level) { vertices_.clear(); num_vertices_ = 0; is_saved_ = false; } + /** + * @brief Set the validate level. + * + * @param validate_level The validate level to set. + */ + inline void SetValidateLevel(const ValidateLevel& validate_level) { + validate_level_ = validate_level; + } + + /** + * @brief Get the validate level. + * + * @return The validate level of this writer. + */ + inline ValidateLevel GetValidateLevel() const { return validate_level_; } + /** * @brief Check if adding a vertex with the given index is allowed. * * @param v The vertex to add. * @param index The given index, -1 means the next unused index. + * @param validate_level The validate level for this operation, + * which is the writer's validate level by default. * @return Status: ok or Status::InvalidOperation error. */ - Status Validate(const Vertex& v, IdType index = -1) const { - // can not add new vertices - if (is_saved_) { - return Status::InvalidOperation("can not add new vertices after dumping"); - } - // start vertex index must be aligned with the chunk size - if (start_vertex_index_ % vertex_info_.GetChunkSize() != 0) { - return Status::InvalidOperation("invalid start vertex index"); - } - // vertex index must larger than start index - if (index != -1 && index < start_vertex_index_) - return Status::InvalidOperation( - "vertex index must larger than start index"); - // contain invalid properties - for (auto& property : v.GetProperties()) { - if (!vertex_info_.ContainProperty(property.first)) - return Status::InvalidOperation("invalid property"); - } - return Status::OK(); - } - + Status Validate( + const Vertex& v, IdType index = -1, + ValidateLevel validate_level = ValidateLevel::default_validate) const; /** * @brief Add a vertex with the given index. * @@ -207,7 +210,7 @@ class VerticesBuilder { */ Status Dump() { // construct the writer - VertexPropertyWriter writer(vertex_info_, prefix_); + VertexPropertyWriter writer(vertex_info_, prefix_, validate_level_); IdType start_chunk_index = start_vertex_index_ / vertex_info_.GetChunkSize(); // convert to table @@ -257,6 +260,7 @@ class VerticesBuilder { IdType start_vertex_index_; IdType num_vertices_; bool is_saved_; + ValidateLevel validate_level_; }; } // namespace builder diff --git a/cpp/src/vertices_builder.cc b/cpp/src/vertices_builder.cc index 2746c26e9..3bd988364 100644 --- a/cpp/src/vertices_builder.cc +++ b/cpp/src/vertices_builder.cc @@ -19,6 +19,94 @@ limitations under the License. namespace GAR_NAMESPACE_INTERNAL { namespace builder { +Status VerticesBuilder::Validate(const Vertex& v, IdType index, + ValidateLevel validate_level) const { + // use the builder's validate level + if (validate_level == ValidateLevel::default_validate) + validate_level = validate_level_; + // no validate + if (validate_level == ValidateLevel::no_validate) + return Status::OK(); + + // weak validate + // can not add new vertices after dumping + if (is_saved_) { + return Status::InvalidOperation("can not add new vertices after dumping"); + } + // the start vertex index must be aligned with the chunk size + if (start_vertex_index_ % vertex_info_.GetChunkSize() != 0) { + return Status::InvalidOperation( + "the start vertex index must be aligned " + "with the chunk size"); + } + // the vertex index must larger than start index + if (index != -1 && index < start_vertex_index_) + return Status::InvalidOperation( + "the vertex index must be larger than start index"); + + // strong validate + if (validate_level == ValidateLevel::strong_validate) { + for (auto& property : v.GetProperties()) { + // check if the property is contained + if (!vertex_info_.ContainProperty(property.first)) + return Status::InvalidOperation( + "invalid property name which is not " + "contained in the vertex info"); + // check if the property type is correct + auto type = vertex_info_.GetPropertyType(property.first).value(); + bool invalid_type = false; + switch (type.id()) { + case Type::BOOL: + if (property.second.type() != + typeid(typename ConvertToArrowType::CType)) { + invalid_type = true; + } + break; + case Type::INT32: + if (property.second.type() != + typeid(typename ConvertToArrowType::CType)) { + invalid_type = true; + } + break; + case Type::INT64: + if (property.second.type() != + typeid(typename ConvertToArrowType::CType)) { + invalid_type = true; + } + break; + case Type::FLOAT: + if (property.second.type() != + typeid(typename ConvertToArrowType::CType)) { + invalid_type = true; + } + break; + case Type::DOUBLE: + if (property.second.type() != + typeid(typename ConvertToArrowType::CType)) { + invalid_type = true; + } + break; + case Type::STRING: + if (property.second.type() != + typeid(typename ConvertToArrowType::CType)) { + invalid_type = true; + } + break; + default: + return Status::TypeError("unsupported property type"); + } + if (invalid_type) { + std::string err_msg = + "invalid data type for property: " + property.first + + ", defined as " + type.ToTypeName() + ", but got " + + property.second.type().name(); + return Status::TypeError(err_msg); + } + } + } + return Status::OK(); +} + Status VerticesBuilder::appendToArray( const DataType& type, const std::string& property_name, std::shared_ptr& array) { // NOLINT From c92eb939deefe0070e2d24a48c2881d14583d58e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=89=E7=90=86?= Date: Wed, 31 May 2023 17:38:23 +0800 Subject: [PATCH 2/3] Test vertices builder --- cpp/include/gar/writer/vertices_builder.h | 1 + cpp/src/vertices_builder.cc | 2 +- cpp/test/test_builder.cc | 32 ++++++++++++++++++++--- 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/cpp/include/gar/writer/vertices_builder.h b/cpp/include/gar/writer/vertices_builder.h index 65ec71c15..97bf8d1f9 100644 --- a/cpp/include/gar/writer/vertices_builder.h +++ b/cpp/include/gar/writer/vertices_builder.h @@ -172,6 +172,7 @@ class VerticesBuilder { Status Validate( const Vertex& v, IdType index = -1, ValidateLevel validate_level = ValidateLevel::default_validate) const; + /** * @brief Add a vertex with the given index. * diff --git a/cpp/src/vertices_builder.cc b/cpp/src/vertices_builder.cc index 3bd988364..08726b119 100644 --- a/cpp/src/vertices_builder.cc +++ b/cpp/src/vertices_builder.cc @@ -50,7 +50,7 @@ Status VerticesBuilder::Validate(const Vertex& v, IdType index, // check if the property is contained if (!vertex_info_.ContainProperty(property.first)) return Status::InvalidOperation( - "invalid property name which is not " + "invalid property name: " + property.first + ", which is not " "contained in the vertex info"); // check if the property type is correct auto type = vertex_info_.GetPropertyType(property.first).value(); diff --git a/cpp/test/test_builder.cc b/cpp/test/test_builder.cc index ac33e83e7..0d30411b8 100644 --- a/cpp/test/test_builder.cc +++ b/cpp/test/test_builder.cc @@ -38,9 +38,11 @@ limitations under the License. #include TEST_CASE("test_vertices_builder") { + std::cout << "Test vertex builder" << std::endl; std::string root; REQUIRE(GetTestResourceRoot(&root).ok()); + // construct vertex builder std::string vertex_meta_file = root + "/ldbc_sample/parquet/" + "person.vertex.yml"; auto vertex_meta = GAR_NAMESPACE::Yaml::LoadFile(vertex_meta_file).value(); @@ -49,6 +51,26 @@ TEST_CASE("test_vertices_builder") { GAR_NAMESPACE::builder::VerticesBuilder builder(vertex_info, "/tmp/", start_index); + // set validate level + REQUIRE(builder.GetValidateLevel() == + GAR_NAMESPACE::ValidateLevel::no_validate); + builder.SetValidateLevel(GAR_NAMESPACE::ValidateLevel::strong_validate); + REQUIRE(builder.GetValidateLevel() == + GAR_NAMESPACE::ValidateLevel::strong_validate); + + // check different validate levels + GAR_NAMESPACE::builder::Vertex v; + v.AddProperty("id", "string_id"); + REQUIRE(builder.Validate(v, -1, GAR_NAMESPACE::ValidateLevel::no_validate).ok()); + REQUIRE(builder.Validate(v, 0, GAR_NAMESPACE::ValidateLevel::weak_validate).ok()); + REQUIRE(builder.Validate(v, -2, GAR_NAMESPACE::ValidateLevel::weak_validate).IsInvalidOperation()); + REQUIRE(builder.Validate(v, 0, + GAR_NAMESPACE::ValidateLevel::strong_validate).IsTypeError()); + v.AddProperty("invalid", "invalid"); + REQUIRE(builder.Validate(v, 0).IsInvalidOperation()); + + + // add vertices std::ifstream fp(root + "/ldbc_sample/person_0_0.csv"); std::string line; getline(fp, line); @@ -60,7 +82,6 @@ TEST_CASE("test_vertices_builder") { getline(readstr, name, '|'); names.push_back(name); } - int index = 0; while (getline(fp, line)) { std::string val; std::istringstream readstr(line); @@ -76,11 +97,12 @@ TEST_CASE("test_vertices_builder") { v.AddProperty(names[i], val); } } - index++; REQUIRE(builder.AddVertex(v).ok()); } + // dump REQUIRE(builder.Dump().ok()); + // check the number of vertices auto fs = arrow::fs::FileSystemFromUriOrPath(root).ValueOrDie(); auto input = fs->OpenInputStream("/tmp/vertex/person/vertex_count").ValueOrDie(); @@ -90,9 +112,11 @@ TEST_CASE("test_vertices_builder") { } TEST_CASE("test_edges_builder") { + std::cout << "Test edge builder" << std::endl; std::string root; REQUIRE(GetTestResourceRoot(&root).ok()); + // construct edge builder std::string edge_meta_file = root + "/ldbc_sample/parquet/" + "person_knows_person.edge.yml"; auto edge_meta = GAR_NAMESPACE::Yaml::LoadFile(edge_meta_file).value(); @@ -100,6 +124,7 @@ TEST_CASE("test_edges_builder") { GAR_NAMESPACE::builder::EdgesBuilder builder( edge_info, "/tmp/", GraphArchive::AdjListType::ordered_by_dest, 903); + // add edges std::ifstream fp(root + "/ldbc_sample/person_knows_person_0_0.csv"); std::string line; getline(fp, line); @@ -128,6 +153,7 @@ TEST_CASE("test_edges_builder") { } } } - std::cout << "Test edge builder" << std::endl; + + // dump REQUIRE(builder.Dump().ok()); } From 063e2e1e5c25f336a75138043458add623f976cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=89=E7=90=86?= Date: Wed, 31 May 2023 17:39:12 +0800 Subject: [PATCH 3/3] Add validation of different levels for edges_builder --- cpp/include/gar/writer/edges_builder.h | 50 +++++++++------ cpp/src/edges_builder.cc | 85 ++++++++++++++++++++++++++ cpp/src/vertices_builder.cc | 4 +- cpp/test/test_builder.cc | 43 ++++++++++--- 4 files changed, 152 insertions(+), 30 deletions(-) diff --git a/cpp/include/gar/writer/edges_builder.h b/cpp/include/gar/writer/edges_builder.h index ff20f156d..16eb06095 100644 --- a/cpp/include/gar/writer/edges_builder.h +++ b/cpp/include/gar/writer/edges_builder.h @@ -147,13 +147,17 @@ class EdgesBuilder { * @param prefix The absolute prefix. * @param adj_list_type The adj list type of the edges. * @param num_vertices The total number of vertices for source or destination. + * @param validate_level The validate level, with no validate by default. */ - explicit EdgesBuilder(const EdgeInfo edge_info, const std::string& prefix, - AdjListType adj_list_type, IdType num_vertices) + explicit EdgesBuilder( + const EdgeInfo edge_info, const std::string& prefix, + AdjListType adj_list_type, IdType num_vertices, + const ValidateLevel& validate_level = ValidateLevel::no_validate) : edge_info_(edge_info), prefix_(prefix), adj_list_type_(adj_list_type), - num_vertices_(num_vertices) { + num_vertices_(num_vertices), + validate_level_(validate_level) { edges_.clear(); num_edges_ = 0; is_saved_ = false; @@ -175,28 +179,32 @@ class EdgesBuilder { } } + /** + * @brief Set the validate level. + * + * @param validate_level The validate level to set. + */ + inline void SetValidateLevel(const ValidateLevel& validate_level) { + validate_level_ = validate_level; + } + + /** + * @brief Get the validate level. + * + * @return The validate level of this writer. + */ + inline ValidateLevel GetValidateLevel() const { return validate_level_; } + /** * @brief Check if adding an edge is allowed. * * @param e The edge to add. + * @param validate_level The validate level for this operation, + * which is the writer's validate level by default. * @return Status: ok or status::InvalidOperation error. */ - Status Validate(const Edge& e) { - // can not add new edges - if (is_saved_) { - return Status::InvalidOperation("can not add new edges after dumping"); - } - // invalid adj list type - if (!edge_info_.ContainAdjList(adj_list_type_)) { - return Status::InvalidOperation("invalid adj list type"); - } - // contain invalid properties - for (auto& property : e.GetProperties()) { - if (!edge_info_.ContainProperty(property.first)) - return Status::InvalidOperation("invalid property"); - } - return Status::OK(); - } + Status Validate(const Edge& e, ValidateLevel validate_level = + ValidateLevel::default_validate) const; /** * @brief Get the vertex chunk index of a given edge. @@ -249,7 +257,8 @@ class EdgesBuilder { */ Status Dump() { // construct the writer - EdgeChunkWriter writer(edge_info_, prefix_, adj_list_type_); + EdgeChunkWriter writer(edge_info_, prefix_, adj_list_type_, + validate_level_); // construct empty edge collections for vertex chunks without edges IdType num_vertex_chunks = (num_vertices_ + vertex_chunk_size_ - 1) / vertex_chunk_size_; @@ -372,6 +381,7 @@ class EdgesBuilder { IdType num_vertices_; IdType num_edges_; bool is_saved_; + ValidateLevel validate_level_; }; } // namespace builder diff --git a/cpp/src/edges_builder.cc b/cpp/src/edges_builder.cc index 2a9b46742..d5eff84f7 100644 --- a/cpp/src/edges_builder.cc +++ b/cpp/src/edges_builder.cc @@ -20,6 +20,91 @@ limitations under the License. namespace GAR_NAMESPACE_INTERNAL { namespace builder { +Status EdgesBuilder::Validate(const Edge& e, + ValidateLevel validate_level) const { + // use the builder's validate level + if (validate_level == ValidateLevel::default_validate) + validate_level = validate_level_; + // no validate + if (validate_level == ValidateLevel::no_validate) + return Status::OK(); + + // weak validate + // can not add new edges after dumping + if (is_saved_) { + return Status::InvalidOperation("can not add new edges after dumping"); + } + // invalid adj list type + if (!edge_info_.ContainAdjList(adj_list_type_)) { + return Status::InvalidOperation( + "the adj list type " + + std::string(AdjListTypeToString(adj_list_type_)) + + " does not exist in the edge info"); + } + + // strong validate + if (validate_level == ValidateLevel::strong_validate) { + for (auto& property : e.GetProperties()) { + // check if the property is contained + if (!edge_info_.ContainProperty(property.first)) + return Status::InvalidOperation( + "invalid property name: " + property.first + + ", which is not contained in the vertex info"); + // check if the property type is correct + auto type = edge_info_.GetPropertyType(property.first).value(); + bool invalid_type = false; + switch (type.id()) { + case Type::BOOL: + if (property.second.type() != + typeid(typename ConvertToArrowType::CType)) { + invalid_type = true; + } + break; + case Type::INT32: + if (property.second.type() != + typeid(typename ConvertToArrowType::CType)) { + invalid_type = true; + } + break; + case Type::INT64: + if (property.second.type() != + typeid(typename ConvertToArrowType::CType)) { + invalid_type = true; + } + break; + case Type::FLOAT: + if (property.second.type() != + typeid(typename ConvertToArrowType::CType)) { + invalid_type = true; + } + break; + case Type::DOUBLE: + if (property.second.type() != + typeid(typename ConvertToArrowType::CType)) { + invalid_type = true; + } + break; + case Type::STRING: + if (property.second.type() != + typeid(typename ConvertToArrowType::CType)) { + invalid_type = true; + } + break; + default: + return Status::TypeError("unsupported property type"); + } + if (invalid_type) { + std::string err_msg = + "invalid data type for property: " + property.first + + ", defined as " + type.ToTypeName() + ", but got " + + property.second.type().name(); + return Status::TypeError(err_msg); + } + } + } + return Status::OK(); +} + Status EdgesBuilder::appendToArray( const DataType& type, const std::string& property_name, std::shared_ptr& array, // NOLINT diff --git a/cpp/src/vertices_builder.cc b/cpp/src/vertices_builder.cc index 08726b119..74408116b 100644 --- a/cpp/src/vertices_builder.cc +++ b/cpp/src/vertices_builder.cc @@ -50,8 +50,8 @@ Status VerticesBuilder::Validate(const Vertex& v, IdType index, // check if the property is contained if (!vertex_info_.ContainProperty(property.first)) return Status::InvalidOperation( - "invalid property name: " + property.first + ", which is not " - "contained in the vertex info"); + "invalid property name: " + property.first + + ", which is not contained in the vertex info"); // check if the property type is correct auto type = vertex_info_.GetPropertyType(property.first).value(); bool invalid_type = false; diff --git a/cpp/test/test_builder.cc b/cpp/test/test_builder.cc index 0d30411b8..b61973be2 100644 --- a/cpp/test/test_builder.cc +++ b/cpp/test/test_builder.cc @@ -60,16 +60,18 @@ TEST_CASE("test_vertices_builder") { // check different validate levels GAR_NAMESPACE::builder::Vertex v; - v.AddProperty("id", "string_id"); - REQUIRE(builder.Validate(v, -1, GAR_NAMESPACE::ValidateLevel::no_validate).ok()); - REQUIRE(builder.Validate(v, 0, GAR_NAMESPACE::ValidateLevel::weak_validate).ok()); - REQUIRE(builder.Validate(v, -2, GAR_NAMESPACE::ValidateLevel::weak_validate).IsInvalidOperation()); - REQUIRE(builder.Validate(v, 0, - GAR_NAMESPACE::ValidateLevel::strong_validate).IsTypeError()); - v.AddProperty("invalid", "invalid"); + v.AddProperty("id", "id_of_string"); + REQUIRE( + builder.Validate(v, 0, GAR_NAMESPACE::ValidateLevel::no_validate).ok()); + REQUIRE( + builder.Validate(v, 0, GAR_NAMESPACE::ValidateLevel::weak_validate).ok()); + REQUIRE(builder.Validate(v, -2, GAR_NAMESPACE::ValidateLevel::weak_validate) + .IsInvalidOperation()); + REQUIRE(builder.Validate(v, 0, GAR_NAMESPACE::ValidateLevel::strong_validate) + .IsTypeError()); + v.AddProperty("invalid_name", "invalid_value"); REQUIRE(builder.Validate(v, 0).IsInvalidOperation()); - // add vertices std::ifstream fp(root + "/ldbc_sample/person_0_0.csv"); std::string line; @@ -99,9 +101,13 @@ TEST_CASE("test_vertices_builder") { } REQUIRE(builder.AddVertex(v).ok()); } + // dump REQUIRE(builder.Dump().ok()); + // can not add new vertices after dumping + REQUIRE(builder.AddVertex(v).IsInvalidOperation()); + // check the number of vertices auto fs = arrow::fs::FileSystemFromUriOrPath(root).ValueOrDie(); auto input = @@ -124,6 +130,24 @@ TEST_CASE("test_edges_builder") { GAR_NAMESPACE::builder::EdgesBuilder builder( edge_info, "/tmp/", GraphArchive::AdjListType::ordered_by_dest, 903); + // set validate level + REQUIRE(builder.GetValidateLevel() == + GAR_NAMESPACE::ValidateLevel::no_validate); + builder.SetValidateLevel(GAR_NAMESPACE::ValidateLevel::strong_validate); + REQUIRE(builder.GetValidateLevel() == + GAR_NAMESPACE::ValidateLevel::strong_validate); + + // check different validate levels + GAR_NAMESPACE::builder::Edge e(0, 1); + e.AddProperty("creationDate", 2020); + REQUIRE(builder.Validate(e, GAR_NAMESPACE::ValidateLevel::no_validate).ok()); + REQUIRE( + builder.Validate(e, GAR_NAMESPACE::ValidateLevel::weak_validate).ok()); + REQUIRE(builder.Validate(e, GAR_NAMESPACE::ValidateLevel::strong_validate) + .IsTypeError()); + e.AddProperty("invalid_name", "invalid_value"); + REQUIRE(builder.Validate(e).IsInvalidOperation()); + // add edges std::ifstream fp(root + "/ldbc_sample/person_knows_person_0_0.csv"); std::string line; @@ -156,4 +180,7 @@ TEST_CASE("test_edges_builder") { // dump REQUIRE(builder.Dump().ok()); + + // can not add new edges after dumping + REQUIRE(builder.AddEdge(e).IsInvalidOperation()); }