From e065f6f637a6adc202bda32d1d504016f5d77d6d Mon Sep 17 00:00:00 2001 From: "xiaolei.zl" Date: Fri, 3 Nov 2023 15:33:18 +0800 Subject: [PATCH 1/6] fix edge expandv --- .github/workflows/hqps-db-ci.yml | 2 +- .../src/hqps/hqps_path_expand_builder.h | 37 +- .../hqps_db/core/operator/edge_expand.h | 25 +- .../hqps_db/core/operator/path_expand.h | 452 ++++++++++- flex/engines/hqps_db/core/operator/project.h | 5 +- flex/engines/hqps_db/core/operator/scan.h | 10 +- flex/engines/hqps_db/core/operator/sink.h | 12 +- flex/engines/hqps_db/core/params.h | 52 +- flex/engines/hqps_db/core/sync_engine.h | 44 +- flex/engines/hqps_db/core/utils/keyed.h | 10 +- flex/engines/hqps_db/core/utils/props.h | 12 +- flex/engines/hqps_db/database/adj_list.h | 1 + .../hqps_db/database/mutable_csr_interface.h | 57 +- .../structures/multi_edge_set/flat_edge_set.h | 7 +- .../multi_edge_set/untyped_edge_set.h | 9 +- .../multi_vertex_set/general_vertex_set.h | 738 ++++++++++++++++-- .../multi_vertex_set/keyed_row_vertex_set.h | 4 +- .../multi_vertex_set/row_vertex_set.h | 4 +- .../multi_vertex_set/two_label_vertex_set.h | 6 +- .../queries/ic/adhoc/simple_match_11.cypher | 1 + flex/tests/hqps/match_query.h | 41 + flex/tests/hqps/query_test.cc | 13 + .../suite/simple/SimpleMatchQueries.java | 12 + .../integration/ldbc/SimpleMatchTest.java | 8 + 24 files changed, 1415 insertions(+), 147 deletions(-) create mode 100644 flex/resources/queries/ic/adhoc/simple_match_11.cypher diff --git a/.github/workflows/hqps-db-ci.yml b/.github/workflows/hqps-db-ci.yml index 47a596dfd2c5..91f128bcc33d 100644 --- a/.github/workflows/hqps-db-ci.yml +++ b/.github/workflows/hqps-db-ci.yml @@ -122,7 +122,7 @@ jobs: eval ${cmd} done - for i in 1 2 3 4 5 6 7 8 9 10; + for i in 1 2 3 4 5 6 7 8 9 10 11; do cmd="./load_plan_and_gen.sh -e=hqps -i=../resources/queries/ic/adhoc/simple_match_${i}.cypher -w=/tmp/codgen/" cmd=${cmd}" -o=/tmp/plugin --ir_conf=${GS_TEST_DIR}/flex/ldbc-sf01-long-date/engine_config.yaml " diff --git a/flex/codegen/src/hqps/hqps_path_expand_builder.h b/flex/codegen/src/hqps/hqps_path_expand_builder.h index d05ea4d862af..04a26f8d8104 100644 --- a/flex/codegen/src/hqps/hqps_path_expand_builder.h +++ b/flex/codegen/src/hqps/hqps_path_expand_builder.h @@ -117,13 +117,14 @@ class PathExpandOpBuilder { throw std::runtime_error("Expect edge graph type"); } auto& edge_type = act_graph_type.graph_data_type(); - if (edge_type.size() != 1) { - throw std::runtime_error("Expect only one edge type"); + CHECK(edge_type.size() >= 1) << "Expect at least one edge type"; + std::vector src_labels, dst_labels; + for (auto i = 0; i < edge_type.size(); ++i) { + auto& edge_type_i = edge_type[i]; + auto& edge_labels_i = edge_type_i.label(); + src_labels.push_back(edge_labels_i.src_label().value()); + dst_labels.push_back(edge_labels_i.dst_label().value()); } - auto& edge_type0 = edge_type[0]; - auto& edge_labels = edge_type0.label(); - auto src_label = edge_labels.src_label().value(); - auto dst_label = edge_labels.dst_label().value(); // if find edge triplets, we clear current VLOG(10) << "Clear current dst labels:" @@ -131,12 +132,28 @@ class PathExpandOpBuilder { dst_vertex_labels_.clear(); if (direction_ == internal::Direction::kBoth) { - CHECK(src_label == dst_label); - dst_vertex_labels_.emplace_back(src_label); + // if direction is both, we need to check src_label == dst_label + // dedup src_labels + std::sort(src_labels.begin(), src_labels.end()); + src_labels.erase(std::unique(src_labels.begin(), src_labels.end()), + src_labels.end()); + // dedup dst_labels + std::sort(dst_labels.begin(), dst_labels.end()); + dst_labels.erase(std::unique(dst_labels.begin(), dst_labels.end()), + dst_labels.end()); + CHECK(src_labels.size() == dst_labels.size()); + for (auto i = 0; i < src_labels.size(); ++i) { + CHECK(src_labels[i] == dst_labels[i]); + dst_vertex_labels_.emplace_back(dst_labels[i]); + } } else if (direction_ == internal::Direction::kOut) { - dst_vertex_labels_.emplace_back(dst_label); + for (auto i = 0; i < dst_labels.size(); ++i) { + dst_vertex_labels_.emplace_back(dst_labels[i]); + } } else if (direction_ == internal::Direction::kIn) { - dst_vertex_labels_.emplace_back(src_label); + for (auto i = 0; i < src_labels.size(); ++i) { + dst_vertex_labels_.emplace_back(src_labels[i]); + } } else { throw std::runtime_error("Unknown direction"); } diff --git a/flex/engines/hqps_db/core/operator/edge_expand.h b/flex/engines/hqps_db/core/operator/edge_expand.h index fdd7d147207d..c22428519d10 100644 --- a/flex/engines/hqps_db/core/operator/edge_expand.h +++ b/flex/engines/hqps_db/core/operator/edge_expand.h @@ -309,11 +309,11 @@ class EdgeExpand { /// @param v_sets /// @param edge_expand_opt /// @return - template >> static RES_T EdgeExpandV( const GRAPH_INTERFACE& graph, - const GeneralVertexSet& cur_vertex_set, + const GeneralVertexSet& cur_vertex_set, Direction direction, label_id_t edge_label, label_id_t other_label, Filter&& edge_filter, size_t limit = INT_MAX) { @@ -440,11 +440,12 @@ class EdgeExpand { /// @param v_sets /// @param edge_expand_opt /// @return - template < - typename VERTEX_SET_T, size_t num_labels, typename EDGE_FILTER_T, - size_t... Is, typename std::enable_if<(num_labels != 2)>::type* = nullptr, - typename RES_T = std::pair, - std::vector>> + template ::type* = nullptr, + typename RES_T = + std::pair, + std::vector>> static RES_T EdgeExpandV(const GRAPH_INTERFACE& graph, const VERTEX_SET_T& cur_vertex_set, Direction direction, label_id_t edge_label, @@ -499,7 +500,7 @@ class EdgeExpand { } CHECK(cur_ind == total_size); auto copied_labels(other_labels); - GeneralVertexSet res_set( + GeneralVertexSet res_set( std::move(res_vids), std::move(copied_labels), std::move(res_bitset)); return std::make_pair(std::move(res_set), std::move(res_offset)); @@ -602,10 +603,10 @@ class EdgeExpand { /// @param edge_filter /// @param limit /// @return / - template + template static auto EdgeExpandVMultiTriplet( const GRAPH_INTERFACE& graph, - const GeneralVertexSet& cur_vertex_set, + const GeneralVertexSet& cur_vertex_set, Direction& direction, std::array, num_pairs>& edge_labels, std::tuple...>& prop_names, @@ -833,10 +834,10 @@ class EdgeExpand { /// @param edge_filter /// @param limit /// @return / - template + template static auto EdgeExpandEMultiTriplet( const GRAPH_INTERFACE& graph, - const GeneralVertexSet& cur_vertex_set, + const GeneralVertexSet& cur_vertex_set, Direction& direction, std::array, num_pairs>& edge_labels, std::tuple...>& prop_names, diff --git a/flex/engines/hqps_db/core/operator/path_expand.h b/flex/engines/hqps_db/core/operator/path_expand.h index 683f723392af..3ea15daf4428 100644 --- a/flex/engines/hqps_db/core/operator/path_expand.h +++ b/flex/engines/hqps_db/core/operator/path_expand.h @@ -180,7 +180,7 @@ class PathExpand { return std::make_pair(std::move(row_vertex_set), std::move(res_offsets)); } - // PathExpandV for row vertex set as input. + // PathExpandV for row vertex set as input, retrieve no properties. template , typename RES_T = std::pair>> @@ -207,6 +207,76 @@ class PathExpand { std::move(std::get<2>(tuple))); } + // PathExpandV with multiple dst labels, for row vertex set as input, output + // no properties. + template < + typename... V_SET_T, typename VERTEX_FILTER_T, typename LabelT, + size_t num_labels, typename EDGE_FILTER_T, size_t get_v_num_labels, + typename RES_SET_T = GeneralVertexSet, + typename RES_T = std::pair>> + static RES_T PathExpandV( + const GRAPH_INTERFACE& graph, + const RowVertexSet& vertex_set, + PathExpandVMultiDstOpt&& + path_expand_opt) { + // + auto cur_label = vertex_set.GetLabel(); + auto& range = path_expand_opt.range_; + auto& edge_expand_opt = path_expand_opt.edge_expand_opt_; + auto& get_v_opt = path_expand_opt.get_v_opt_; + return PathExpandMultiDstV(graph, cur_label, vertex_set.GetVertices(), + range, edge_expand_opt, get_v_opt); + } + + // PathExpandV with multiple dst labels, for twoLabelSet vertex set as input, + // output no properties. + template < + typename... V_SET_T, typename VERTEX_FILTER_T, typename LabelT, + size_t num_labels, typename EDGE_FILTER_T, size_t get_v_num_labels, + typename RES_SET_T = GeneralVertexSet, + typename RES_T = std::pair>> + static RES_T PathExpandV( + const GRAPH_INTERFACE& graph, + const TwoLabelVertexSet& vertex_set, + PathExpandVMultiDstOpt&& + path_expand_opt) { + auto& range = path_expand_opt.range_; + auto& edge_expand_opt = path_expand_opt.edge_expand_opt_; + auto& get_v_opt = path_expand_opt.get_v_opt_; + auto& bitset = vertex_set.GetBitset(); + auto src_label_vec = vertex_set.GetLabelVec(); + auto src_label_id_vec = label_key_vec_2_label_id_vec(src_label_vec); + return PathExpandMultiDstVFromTwoLabelSet(graph, vertex_set.GetVertices(), + src_label_id_vec, bitset, range, + edge_expand_opt, get_v_opt); + } + + // PathExpandV with multiple dst labels, for general vertex set as input, + // output no properties. + template < + typename... V_SET_T, typename VERTEX_FILTER_T, typename LabelT, + size_t num_labels, typename EDGE_FILTER_T, size_t get_v_num_labels, + typename RES_SET_T = GeneralVertexSet, + typename RES_T = std::pair>> + static RES_T PathExpandV( + const GRAPH_INTERFACE& graph, + const GeneralVertexSet& vertex_set, + PathExpandVMultiDstOpt&& + path_expand_opt) { + auto& range = path_expand_opt.range_; + auto& edge_expand_opt = path_expand_opt.edge_expand_opt_; + auto& get_v_opt = path_expand_opt.get_v_opt_; + auto& bitsets = vertex_set.GetBitsets(); + auto src_label_vec = vertex_set.GetLabelVec(); + auto src_label_id_vec = label_key_vec_2_label_id_vec(src_label_vec); + return PathExpandMultiDstVFromGeneralSet(graph, vertex_set.GetVertices(), + src_label_id_vec, bitsets, range, + edge_expand_opt, get_v_opt); + } + template static std::tuple, std::vector, std::vector> @@ -301,7 +371,6 @@ class PathExpand { } std::vector> gids; std::vector> offsets; - std::unordered_set visited_vertices; gids.resize(range.limit_); offsets.resize(range.limit_); @@ -317,7 +386,6 @@ class PathExpand { offsets[0].emplace_back(i); } offsets[0].emplace_back(src_vertices_size); - visited_vertices.insert(src_vertices_vec.begin(), src_vertices_vec.end()); double visit_array_time = 0.0; for (auto cur_hop = 1; cur_hop < range.limit_; ++cur_hop) { @@ -378,6 +446,137 @@ class PathExpand { } private: + // Expand V from single label vertices, only take vertices. + // Collect multiple dst label vertices. + template + static auto PathExpandMultiDstV( + const GRAPH_INTERFACE& graph, label_id_t src_label, + const std::vector& vertices_vec, const Range& range, + const EdgeExpandOptMultiLabel& + edge_opt, + const GetVOpt& get_vopt) { + // We suppose VERTEX_FILTER is true + auto& edge_other_labels = edge_opt.other_labels_; + auto& vertex_other_labels = get_vopt.v_labels_; + auto edge_other_labels_vec = array_to_vec(edge_other_labels); + auto vertex_other_labels_vec = array_to_vec(vertex_other_labels); + auto edge_label = edge_opt.edge_label_; + std::vector res_vertices; + std::vector res_offsets; + std::vector res_dists; + std::vector res_labels_vec; + std::vector src_labels_vec(vertices_vec.size(), src_label); + std::tie(res_vertices, res_dists, res_labels_vec, res_offsets) = + path_expandv_from_src_label_with_multi_dst( + graph, edge_label, edge_other_labels_vec, vertex_other_labels_vec, + edge_opt.direction_, vertices_vec, src_labels_vec, range); + auto res_dist_tuple = single_col_vec_to_tuple_vec(std::move(res_dists)); + CHECK(res_offsets.size() == vertices_vec.size() + 1); + CHECK(res_vertices.size() == res_labels_vec.size()); + std::vector res_bitsets; + std::vector label_id_vec; + std::tie(res_bitsets, label_id_vec) = + convert_label_id_vec_to_bitsets(res_labels_vec); + CHECK(label_id_vec.size() <= vertex_other_labels.size()) + << "label_id_vec.size(): " << label_id_vec.size() + << ", vertex_other_labels.size(): " << vertex_other_labels.size(); + CHECK(res_bitsets.size() == label_id_vec.size()) + << "res_bitsets.size(): " << res_bitsets.size() + << ", label_id_vec.size(): " << label_id_vec.size(); + auto set = make_general_set( + std::move(res_vertices), std::move(res_dist_tuple), {"dist"}, + std::move(label_id_vec), std::move(res_bitsets)); + return std::make_pair(std::move(set), std::move(res_offsets)); + } + + // Expand V from two label vertices. + // Collect multiple dst label vertices. + template + static auto PathExpandMultiDstVFromTwoLabelSet( + const GRAPH_INTERFACE& graph, + const std::vector& vertices_vec, + const std::vector& src_labels_vec, + const grape::Bitset& bitset, const Range& range, + const EdgeExpandOptMultiLabel& + edge_opt, + const GetVOpt& get_vopt) { + auto& edge_other_labels = edge_opt.other_labels_; + auto& vertex_other_labels = get_vopt.v_labels_; + auto edge_other_labels_vec = array_to_vec(edge_other_labels); + auto vertex_other_labels_vec = array_to_vec(vertex_other_labels); + auto edge_label = edge_opt.edge_label_; + std::vector res_vertices; + std::vector res_offsets; + std::vector res_dists; + std::vector res_labels_vec; + std::tie(res_vertices, res_dists, res_labels_vec, res_offsets) = + path_expandv_from_src_label_with_multi_dst( + graph, edge_label, edge_other_labels_vec, vertex_other_labels_vec, + edge_opt.direction_, vertices_vec, src_labels_vec, range); + auto res_dist_tuple = single_col_vec_to_tuple_vec(std::move(res_dists)); + CHECK(res_offsets.size() == vertices_vec.size() + 1); + CHECK(res_vertices.size() == res_labels_vec.size()); + std::vector res_bitsets; + std::vector label_id_vec; + std::tie(res_bitsets, label_id_vec) = + convert_label_id_vec_to_bitsets(res_labels_vec); + CHECK(label_id_vec.size() <= vertex_other_labels.size()) + << "label_id_vec.size(): " << label_id_vec.size() + << ", vertex_other_labels.size(): " << vertex_other_labels.size(); + CHECK(res_bitsets.size() == label_id_vec.size()) + << "res_bitsets.size(): " << res_bitsets.size() + << ", label_id_vec.size(): " << label_id_vec.size(); + auto set = make_general_set( + std::move(res_vertices), std::move(res_dist_tuple), {"dist"}, + std::move(label_id_vec), std::move(res_bitsets)); + return std::make_pair(std::move(set), std::move(res_offsets)); + } + + // PathExpandMultiDstVFromGeneralSet + template + static auto PathExpandMultiDstVFromGeneralSet( + const GRAPH_INTERFACE& graph, + const std::vector& vertices_vec, + const std::vector& src_labels_vec, + const std::vector& bitsets, const Range& range, + const EdgeExpandOptMultiLabel& + edge_opt, + const GetVOpt& get_vopt) { + auto& edge_other_labels = edge_opt.other_labels_; + auto& vertex_other_labels = get_vopt.v_labels_; + auto edge_other_labels_vec = array_to_vec(edge_other_labels); + auto vertex_other_labels_vec = array_to_vec(vertex_other_labels); + auto edge_label = edge_opt.edge_label_; + std::vector res_vertices; + std::vector res_offsets; + std::vector res_dists; + std::vector res_labels_vec; + std::tie(res_vertices, res_dists, res_labels_vec, res_offsets) = + path_expandv_from_src_label_with_multi_dst( + graph, edge_label, edge_other_labels_vec, vertex_other_labels_vec, + edge_opt.direction_, vertices_vec, src_labels_vec, range); + auto res_dist_tuple = single_col_vec_to_tuple_vec(std::move(res_dists)); + CHECK(res_offsets.size() == vertices_vec.size() + 1); + CHECK(res_vertices.size() == res_labels_vec.size()); + std::vector res_bitsets; + std::vector label_id_vec; + std::tie(res_bitsets, label_id_vec) = + convert_label_id_vec_to_bitsets(res_labels_vec); + CHECK(label_id_vec.size() <= vertex_other_labels.size()) + << "label_id_vec.size(): " << label_id_vec.size() + << ", vertex_other_labels.size(): " << vertex_other_labels.size(); + CHECK(res_bitsets.size() == label_id_vec.size()) + << "res_bitsets.size(): " << res_bitsets.size() + << ", label_id_vec.size(): " << label_id_vec.size(); + auto set = make_general_set( + std::move(res_vertices), std::move(res_dist_tuple), {"dist"}, + std::move(label_id_vec), std::move(res_bitsets)); + return std::make_pair(std::move(set), std::move(res_offsets)); + } + // Expand Path from single label vertices, only take vertices. template @@ -465,6 +664,228 @@ class PathExpand { return std::make_pair(std::move(path_set), std::move(ctx_offsets)); } + // expand from single src label to multiple dst labels. + static auto path_expandv_from_src_label_with_multi_dst( + const GRAPH_INTERFACE& graph, label_id_t edge_label, + const std::vector& other_labels, + const std::vector& + vertex_other_labels, // vertex_other_labels is used to filter + // vertices with other labels. + const Direction& direction, const std::vector& vertices_vec, + const std::vector& src_v_labels_vec, const Range& range) { + // (range, other_label_ind, vertices) + std::vector> other_vertices; + std::vector> other_labels_vec; + std::vector> other_offsets; + other_vertices.resize(range.limit_); + other_offsets.resize(range.limit_); + other_labels_vec.resize(range.limit_); + for (auto i = 0; i < range.limit_; ++i) { + other_offsets[i].reserve(vertices_vec.size() + 1); + } + other_vertices[0].insert(other_vertices[0].end(), vertices_vec.begin(), + vertices_vec.end()); + other_labels_vec[0] = src_v_labels_vec; + for (auto i = 0; i < vertices_vec.size(); ++i) { + other_offsets[0].emplace_back(i); + } + other_offsets[0].emplace_back(vertices_vec.size()); + // the input vertices can have many labels. Should be the union of + // src_vertex_labels and other_labels. + std::vector src_label_candidates; + { + src_label_candidates.insert(src_label_candidates.end(), + src_v_labels_vec.begin(), + src_v_labels_vec.end()); + src_label_candidates.insert(src_label_candidates.end(), + other_labels.begin(), other_labels.end()); + std::sort(src_label_candidates.begin(), src_label_candidates.end()); + // dedup + auto last = + std::unique(src_label_candidates.begin(), src_label_candidates.end()); + src_label_candidates.erase(last, src_label_candidates.end()); + } + // iterate for all hops + for (auto cur_hop = 1; cur_hop < range.limit_; ++cur_hop) { + using nbr_list_type = + std::pair, label_id_t>; + std::vector> nbr_lists; + nbr_lists.resize(other_vertices[cur_hop - 1].size()); + std::vector indicator(other_vertices[cur_hop - 1].size(), false); + // std::vector other_vertex_labels; + // other_vertex_labels.resize(other_vertices[cur_hop - 1].size()); + for (auto& src_other_label : src_label_candidates) { + std::vector indices; + + std::vector other_vertices_for_cur_label; + std::tie(other_vertices_for_cur_label, indices) = + get_vertices_with_label(other_vertices[cur_hop - 1], + other_labels_vec[cur_hop - 1], + src_other_label); + if (indices.size() > 0) { + VLOG(10) << "Get vertices with label: " + << std::to_string(src_other_label) << ", " + << other_vertices_for_cur_label.size(); + + for (auto other_label_ind = 0; other_label_ind < other_labels.size(); + ++other_label_ind) { + auto other_label = other_labels[other_label_ind]; + label_id_t real_src_label, real_dst_label; + if (direction == Direction::Out) { + real_src_label = src_other_label; + real_dst_label = other_label; + } else { + // in or both. + real_src_label = other_label; + real_dst_label = src_other_label; + } + auto cur_nbr_list = + graph.GetOtherVertices(real_src_label, real_dst_label, + edge_label, other_vertices_for_cur_label, + gs::to_string(direction), INT_MAX); + { + size_t tmp_sum = 0; + for (auto i = 0; i < cur_nbr_list.size(); ++i) { + tmp_sum += cur_nbr_list.get_vector(i).size(); + } + VLOG(10) << "Get other vertices: " << cur_nbr_list.size() + << ", nbr size: " << tmp_sum + << ", from: " << std::to_string(real_src_label) + << ", to: " << std::to_string(real_dst_label) + << ", edge_label: " << std::to_string(edge_label) + << ", direction: " << gs::to_string(direction); + } + + for (auto i = 0; i < indices.size(); ++i) { + auto index = indices[i]; + nbr_lists[index].emplace_back(cur_nbr_list.get_vector(i), + other_label); + indicator[index] = true; + } + } + } else { + VLOG(10) << "No vertices with label: " + << std::to_string(src_other_label); + } + } + // extract vertices from nbrs, and add them to other_vertices[cur_hop] + // and update other_offset + auto& cur_other_vertices = other_vertices[cur_hop]; + auto& cur_other_offsets = + other_offsets[cur_hop]; // other_offset is always aligned with + // src_vertices. + auto& cur_other_labels_vec = other_labels_vec[cur_hop]; + size_t cur_hop_new_vnum = 0; + for (auto i = 0; i < nbr_lists.size(); ++i) { + for (auto& nbr_list_pair : nbr_lists[i]) { + cur_hop_new_vnum += nbr_list_pair.first.size(); + } + } + VLOG(10) << "cur_hop_new_vnum: " << cur_hop_new_vnum; + cur_other_vertices.reserve(cur_hop_new_vnum); + // cur_other_offsets.reserve(cur_hop_new_vnum); + cur_other_labels_vec.reserve(cur_hop_new_vnum); + cur_other_offsets.reserve(vertices_vec.size() + 1); + // cur_other_offsets.emplace_back(0); + std::vector tmp_cur_offset; + tmp_cur_offset.reserve(cur_hop_new_vnum); + tmp_cur_offset.emplace_back(0); + size_t cur_cnt = 0; + for (auto i = 0; i < nbr_lists.size(); ++i) { + for (auto& nbr_list_pair : nbr_lists[i]) { + auto cur_other_vertex_label = nbr_list_pair.second; + auto& nbr_list = nbr_list_pair.first; + for (auto j = 0; j < nbr_list.size(); ++j) { + auto& nbr = nbr_list[j]; + cur_other_vertices.emplace_back(nbr.neighbor()); + cur_other_labels_vec.emplace_back(cur_other_vertex_label); + } + cur_cnt += nbr_list.size(); + } + tmp_cur_offset.emplace_back(cur_cnt); + } + CHECK(cur_cnt == cur_hop_new_vnum); + for (auto i = 0; i < other_offsets[cur_hop - 1].size(); ++i) { + other_offsets[cur_hop].emplace_back( + tmp_cur_offset[other_offsets[cur_hop - 1][i]]); + } + } + + // select vertices that are in range and are in vertex_other_labels. + std::vector res_vertices; + std::vector res_offsets; + std::vector res_dists; + std::vector res_labels_vec; + std::vector valid_labels(sizeof(label_id_t) * 8, false); + for (auto& v_label : vertex_other_labels) { + CHECK(v_label < sizeof(label_id_t) * 8) + << "v_label: " << v_label << ", " << sizeof(label_id_t) * 8; + valid_labels[v_label] = true; + } + auto num_valid_labels = + std::accumulate(valid_labels.begin(), valid_labels.end(), 0); + VLOG(10) << "Select vertices within " << num_valid_labels + << " valid labels, from " << other_labels.size(); + + size_t flat_size = 0; + for (auto i = range.start_; i < range.limit_; ++i) { + flat_size += other_vertices[i].size(); + } + VLOG(10) << "PathExpandV from single label, flat size: " << flat_size; + res_vertices.reserve(flat_size); + res_dists.reserve(flat_size); + res_labels_vec.reserve(flat_size); + res_offsets.reserve(vertices_vec.size() + 1); + res_offsets.emplace_back(0); + for (auto i = 0; i < vertices_vec.size(); ++i) { + for (auto j = range.start_; j < range.limit_; ++j) { + auto start = other_offsets[j][i]; + auto end = other_offsets[j][i + 1]; + for (auto k = start; k < end; ++k) { + auto gid = other_vertices[j][k]; + auto label = other_labels_vec[j][k]; + if (valid_labels[label]) { + res_vertices.emplace_back(gid); + res_dists.emplace_back(j); + res_labels_vec.emplace_back(label); + } + } + } + res_offsets.emplace_back(res_vertices.size()); + } + return std::make_tuple(std::move(res_vertices), std::move(res_dists), + std::move(res_labels_vec), std::move(res_offsets)); + } + + // returns the vector of valid labels and the bitsets. + static std::pair, std::vector> + convert_label_id_vec_to_bitsets(const std::vector& label_vec) { + // convert label_id_vec to bitsets. + std::vector res_bitsets; + std::vector res_label_id_vec; + std::vector + label_to_index; // label to index in res_bitsets vector. + label_to_index.resize(sizeof(label_id_t) * 8, -1); + for (auto i = 0; i < label_vec.size(); ++i) { + if (label_to_index[label_vec[i]] == -1) { + label_to_index[label_vec[i]] = res_bitsets.size(); + res_bitsets.emplace_back(); + res_bitsets.back().init(label_vec.size()); + res_label_id_vec.emplace_back(label_vec[i]); + } + } + auto num_valid_labels = res_bitsets.size(); + VLOG(10) << "num valid labels: " << num_valid_labels; + + for (auto i = 0; i < label_vec.size(); ++i) { + auto index = label_to_index[label_vec[i]]; + CHECK(index != -1); + res_bitsets[index].set_bit(i); + } + CHECK(res_label_id_vec.size() == num_valid_labels); + return std::make_pair(std::move(res_bitsets), std::move(res_label_id_vec)); + } + template static auto prepend_tuple(std::vector&& first_col, std::vector>&& old_cols) { @@ -487,6 +908,31 @@ class PathExpand { } return res_vec; } + + static std::pair, std::vector> + get_vertices_with_label(const std::vector& vertices, + const std::vector& label_vec, + const label_id_t query_label) { + std::vector res_vertices; + std::vector indices; + for (auto i = 0; i < label_vec.size(); ++i) { + if (label_vec[i] == query_label) { + res_vertices.emplace_back(vertices[i]); + indices.emplace_back(i); + } + } + return std::make_pair(std::move(res_vertices), std::move(indices)); + } + + static std::vector label_key_vec_2_label_id_vec( + const std::vector& label_key_vec) { + std::vector res_vec; + res_vec.reserve(label_key_vec.size()); + for (auto& label_key : label_key_vec) { + res_vec.emplace_back(label_key.label_id); + } + return res_vec; + } }; } // namespace gs diff --git a/flex/engines/hqps_db/core/operator/project.h b/flex/engines/hqps_db/core/operator/project.h index 0f60d0a7c213..a2b48655761c 100644 --- a/flex/engines/hqps_db/core/operator/project.h +++ b/flex/engines/hqps_db/core/operator/project.h @@ -407,10 +407,11 @@ class ProjectOp { // general vertex set. template < - typename T, typename VID_T, typename LabelT, + typename T, typename VID_T, typename LabelT, typename... SET_T, typename std::enable_if<(!std::is_same_v)>::type* = nullptr> static auto apply_single_project_impl( - const GRAPH_INTERFACE& graph, GeneralVertexSet& node, + const GRAPH_INTERFACE& graph, + GeneralVertexSet& node, const std::string& prop_name_, const std::vector& repeat_array) { VLOG(10) << "start fetching properties"; auto tmp_prop_vec = get_property_tuple_general( diff --git a/flex/engines/hqps_db/core/operator/scan.h b/flex/engines/hqps_db/core/operator/scan.h index 5a67c1fc07a4..723600612953 100644 --- a/flex/engines/hqps_db/core/operator/scan.h +++ b/flex/engines/hqps_db/core/operator/scan.h @@ -50,10 +50,10 @@ class Scan { } template - static GeneralVertexSet ScanMultiLabelVertex( - const GRAPH_INTERFACE& graph, - const std::array& labels, - Filter&& filter) { + static GeneralVertexSet + ScanMultiLabelVertex(const GRAPH_INTERFACE& graph, + const std::array& labels, + Filter&& filter) { auto expr = filter.expr_; auto selectors = filter.selectors_; return scan_multi_label_vertex_with_selector(graph, labels, expr, @@ -139,7 +139,7 @@ class Scan { private: template - static GeneralVertexSet + static GeneralVertexSet scan_multi_label_vertex_with_selector( const GRAPH_INTERFACE& graph, const std::array& labels, const EXPR& expr, diff --git a/flex/engines/hqps_db/core/operator/sink.h b/flex/engines/hqps_db/core/operator/sink.h index b97b763a4770..50cf2650319e 100644 --- a/flex/engines/hqps_db/core/operator/sink.h +++ b/flex/engines/hqps_db/core/operator/sink.h @@ -720,12 +720,12 @@ class SinkOp { } // sink general vertex, we only return vertex ids. - template - static void sink_col_impl(const GRAPH_INTERFACE& graph, - results::CollectiveResults& results_vec, - const GeneralVertexSet& vertex_set, - const std::vector& repeat_offsets, - int32_t tag_id) { + template + static void sink_col_impl( + const GRAPH_INTERFACE& graph, results::CollectiveResults& results_vec, + const GeneralVertexSet& vertex_set, + const std::vector& repeat_offsets, int32_t tag_id) { auto& schema = graph.schema(); auto vertices_vec = vertex_set.GetVertices(); auto labels_vec = vertex_set.GetLabels(); diff --git a/flex/engines/hqps_db/core/params.h b/flex/engines/hqps_db/core/params.h index 3c2777401ba9..51e7bf4c2627 100644 --- a/flex/engines/hqps_db/core/params.h +++ b/flex/engines/hqps_db/core/params.h @@ -239,10 +239,10 @@ struct LengthKey { }; struct LabelKey { - using label_data_type = int32_t; + using label_data_type = uint8_t; int32_t label_id; LabelKey() = default; - LabelKey(int32_t id) : label_id(id) {} + LabelKey(label_data_type id) : label_id(id) {} }; template @@ -738,11 +738,45 @@ struct PathExpandOptImpl { ResultOpt result_opt_; // Get all vertices on Path or only ending vertices. }; +// Path expand with only one edge label, but possible multiple dst labels. +template +struct PathExpandMultiDstOptImpl { + PathExpandMultiDstOptImpl( + EdgeExpandOptMultiLabel&& + edge_expand_opt, + GetVOpt&& get_v_opt, + Range&& range, UNTIL_CONDITION&& until_condition, + PathOpt path_opt = PathOpt::Arbitrary, + ResultOpt result_opt = ResultOpt::EndV) + : edge_expand_opt_(std::move(edge_expand_opt)), + get_v_opt_(std::move(get_v_opt)), + range_(std::move(range)), + until_condition_(std::move(until_condition)), + path_opt_(path_opt), + result_opt_(result_opt) {} + + EdgeExpandOptMultiLabel edge_expand_opt_; + GetVOpt get_v_opt_; + Range range_; // Range for result vertices, default is [0,INT_MAX) + UNTIL_CONDITION until_condition_; + PathOpt path_opt_; // Single path or not. + ResultOpt result_opt_; // Get all vertices on Path or only ending vertices. +}; + template using PathExpandVOpt = PathExpandOptImpl, T...>; +template +using PathExpandVMultiDstOpt = + PathExpandMultiDstOptImpl, T...>; + template using PathExpandPOpt = PathExpandOptImpl>; @@ -766,6 +800,20 @@ auto make_path_expandv_opt( Filter(), path_opt, result_opt); } +template +auto make_path_expandv_opt( + EdgeExpandOptMultiLabel&& + edge_expand_opt, + GetVOpt&& get_v_opt, + Range&& range, PathOpt path_opt = PathOpt::Arbitrary, + ResultOpt result_opt = ResultOpt::EndV) { + return PathExpandVMultiDstOpt( + std::move(edge_expand_opt), std::move(get_v_opt), std::move(range), + Filter(), path_opt, result_opt); +} + template auto make_path_expandp_opt( EdgeExpandOpt&& edge_expand_opt, diff --git a/flex/engines/hqps_db/core/sync_engine.h b/flex/engines/hqps_db/core/sync_engine.h index 96dc908b7918..ea0d427913a1 100644 --- a/flex/engines/hqps_db/core/sync_engine.h +++ b/flex/engines/hqps_db/core/sync_engine.h @@ -95,7 +95,8 @@ class SyncEngine : public BaseEngine { typename... SELECTOR, typename std::enable_if<(append_opt == AppendOpt::Persist && num_labels != 2)>::type* = nullptr, - typename COL_T = GeneralVertexSet> + typename COL_T = + GeneralVertexSet> static Context ScanVertex( const GRAPH_INTERFACE& graph, std::array&& v_labels, @@ -111,7 +112,8 @@ class SyncEngine : public BaseEngine { typename... SELECTOR, typename std::enable_if<(append_opt == AppendOpt::Temp && num_labels != 2)>::type* = nullptr, - typename COL_T = GeneralVertexSet> + typename COL_T = + GeneralVertexSet> static Context ScanVertex( const GRAPH_INTERFACE& graph, std::array&& v_labels, @@ -185,11 +187,11 @@ class SyncEngine : public BaseEngine { return Context(std::move(v_set_tuple)); } - template ::type* = - nullptr, - typename COL_T = GeneralVertexSet> + template < + AppendOpt append_opt, typename OID_T, typename LabelT, size_t num_labels, + typename std::enable_if<(append_opt == AppendOpt::Persist)>::type* = + nullptr, + typename COL_T = GeneralVertexSet> static Context ScanVertexWithOid( const GRAPH_INTERFACE& graph, std::array v_labels, OID_T oid) { @@ -203,7 +205,7 @@ class SyncEngine : public BaseEngine { template < AppendOpt append_opt, typename LabelT, size_t num_labels, typename std::enable_if<(append_opt == AppendOpt::Temp)>::type* = nullptr, - typename COL_T = GeneralVertexSet> + typename COL_T = GeneralVertexSet> static Context ScanVertexWithOid( const GRAPH_INTERFACE& graph, std::array v_labels, int64_t oid) { @@ -421,6 +423,32 @@ class SyncEngine : public BaseEngine { // old context will be abandon here. } + template + static auto PathExpandV( + const GRAPH_INTERFACE& graph, + Context&& ctx, + PathExpandVMultiDstOpt&& + path_expand_opt) { + if (path_expand_opt.path_opt_ != PathOpt::Arbitrary) { + LOG(FATAL) << "Only support Arbitrary path now"; + } + if (path_expand_opt.result_opt_ != ResultOpt::EndV) { + LOG(FATAL) << "Only support EndV now"; + } + auto& select_node = gs::Get(ctx); + auto pair = PathExpand::PathExpandV( + graph, select_node, std::move(path_expand_opt)); + + // create new context node, update offsets. + return ctx.template AddNode(std::move(pair.first), + std::move(pair.second), alias_to_use); + // old context will be abandon here. + } + /// Expand to Path template , using keyed_set_t = KeyedRowVertexSetImpl; // // The builder type. using keyed_builder_t = - KeyedRowVertexSetBuilder; + KeyedRowVertexSetBuilder; using unkeyed_builder_t = typename KeyedRowVertexSetImpl::builder_t; static keyed_builder_t create_keyed_builder( @@ -221,16 +221,16 @@ struct KeyedAggT, }; // general vertex set to_count -template -struct KeyedAggT, AggFunc::COUNT, +template +struct KeyedAggT, AggFunc::COUNT, std::tuple, std::integer_sequence> { using agg_res_t = Collection; // build a counter array. using aggregate_res_builder_t = CountBuilder; static aggregate_res_builder_t create_agg_builder( - const GeneralVertexSet& set, const GI& graph, + const GeneralVertexSet& set, const GI& graph, std::tuple>& selectors) { return CountBuilder(); } diff --git a/flex/engines/hqps_db/core/utils/props.h b/flex/engines/hqps_db/core/utils/props.h index 311bc39292d5..5ef63881b8a5 100644 --- a/flex/engines/hqps_db/core/utils/props.h +++ b/flex/engines/hqps_db/core/utils/props.h @@ -51,7 +51,7 @@ class RowVertexSetImpl; template class TwoLabelVertexSetImpl; -template +template class GeneralVertexSet; template @@ -653,10 +653,10 @@ static auto create_prop_getter_impl( // get for common properties for keyed_row_vertex_set template -static auto create_prop_getter_impl(const GeneralVertexSet& set, - const GRAPH_INTERFACE& graph, - const std::string& prop_name) { + typename LabelT, typename VID_T, typename... SET_T> +static auto create_prop_getter_impl( + const GeneralVertexSet& set, + const GRAPH_INTERFACE& graph, const std::string& prop_name) { using prop_getter_t = typename GRAPH_INTERFACE::template single_prop_getter_t; // const std::array& labels = set.GetLabels(); @@ -669,7 +669,7 @@ static auto create_prop_getter_impl(const GeneralVertexSet& set, return GeneralVertexSetPropGetter< tag_id, prop_getter_t, - typename GeneralVertexSet::index_ele_tuple_t>( + typename GeneralVertexSet::index_ele_tuple_t>( std::move(prop_getters), set.GetBitsets()); } diff --git a/flex/engines/hqps_db/database/adj_list.h b/flex/engines/hqps_db/database/adj_list.h index f9b786d05c7c..0e8c6fa0a194 100644 --- a/flex/engines/hqps_db/database/adj_list.h +++ b/flex/engines/hqps_db/database/adj_list.h @@ -618,6 +618,7 @@ class Nbr { class NbrList { public: NbrList(const Nbr* b, const Nbr* e) : begin_(b), end_(e) {} + NbrList() : begin_(nullptr), end_(nullptr) {} ~NbrList() = default; const Nbr* begin() const { return begin_; } diff --git a/flex/engines/hqps_db/database/mutable_csr_interface.h b/flex/engines/hqps_db/database/mutable_csr_interface.h index a57d5e34a29e..995f3081989a 100644 --- a/flex/engines/hqps_db/database/mutable_csr_interface.h +++ b/flex/engines/hqps_db/database/mutable_csr_interface.h @@ -696,38 +696,39 @@ class MutableCSRInterface { const label_id_t& edge_label_id, const std::vector& vids, const std::string& direction_str, size_t limit) const { mutable_csr_graph_impl::NbrListArray ret; - + ret.resize(vids.size()); if (direction_str == "out" || direction_str == "Out" || direction_str == "OUT") { auto csr = db_session_.graph().get_oe_csr(src_label_id, dst_label_id, edge_label_id); - ret.resize(vids.size()); - for (size_t i = 0; i < vids.size(); ++i) { - auto v = vids[i]; - auto iter = csr->edge_iter(v); - auto& vec = ret.get_vector(i); - while (iter->is_valid()) { - vec.push_back(mutable_csr_graph_impl::Nbr(iter->get_neighbor())); - iter->next(); + if (csr) { + for (size_t i = 0; i < vids.size(); ++i) { + auto v = vids[i]; + auto iter = csr->edge_iter(v); + auto& vec = ret.get_vector(i); + while (iter->is_valid()) { + vec.push_back(mutable_csr_graph_impl::Nbr(iter->get_neighbor())); + iter->next(); + } } } } else if (direction_str == "in" || direction_str == "In" || direction_str == "IN") { auto csr = db_session_.graph().get_ie_csr(dst_label_id, src_label_id, edge_label_id); - ret.resize(vids.size()); - for (size_t i = 0; i < vids.size(); ++i) { - auto v = vids[i]; - auto iter = csr->edge_iter(v); - auto& vec = ret.get_vector(i); - while (iter->is_valid()) { - vec.push_back(mutable_csr_graph_impl::Nbr(iter->get_neighbor())); - iter->next(); + if (csr) { + for (size_t i = 0; i < vids.size(); ++i) { + auto v = vids[i]; + auto iter = csr->edge_iter(v); + auto& vec = ret.get_vector(i); + while (iter->is_valid()) { + vec.push_back(mutable_csr_graph_impl::Nbr(iter->get_neighbor())); + iter->next(); + } } } } else if (direction_str == "both" || direction_str == "Both" || direction_str == "BOTH") { - ret.resize(vids.size()); auto ocsr = db_session_.graph().get_oe_csr(src_label_id, dst_label_id, edge_label_id); auto icsr = db_session_.graph().get_ie_csr(dst_label_id, src_label_id, @@ -735,15 +736,19 @@ class MutableCSRInterface { for (size_t i = 0; i < vids.size(); ++i) { auto v = vids[i]; auto& vec = ret.get_vector(i); - auto iter = ocsr->edge_iter(v); - while (iter->is_valid()) { - vec.push_back(mutable_csr_graph_impl::Nbr(iter->get_neighbor())); - iter->next(); + if (ocsr) { + auto iter = ocsr->edge_iter(v); + while (iter->is_valid()) { + vec.push_back(mutable_csr_graph_impl::Nbr(iter->get_neighbor())); + iter->next(); + } } - iter = icsr->edge_iter(v); - while (iter->is_valid()) { - vec.push_back(mutable_csr_graph_impl::Nbr(iter->get_neighbor())); - iter->next(); + if (icsr) { + auto iter = icsr->edge_iter(v); + while (iter->is_valid()) { + vec.push_back(mutable_csr_graph_impl::Nbr(iter->get_neighbor())); + iter->next(); + } } } } else { diff --git a/flex/engines/hqps_db/structures/multi_edge_set/flat_edge_set.h b/flex/engines/hqps_db/structures/multi_edge_set/flat_edge_set.h index e736d43c231b..8c3b2b0c0e3c 100644 --- a/flex/engines/hqps_db/structures/multi_edge_set/flat_edge_set.h +++ b/flex/engines/hqps_db/structures/multi_edge_set/flat_edge_set.h @@ -265,9 +265,10 @@ class FlatEdgeSet { size_t Size() const { return vec_.size(); } template - std::pair, std::vector> GetVertices( - VOpt v_opt, const std::array& labels, - FILTER_T& expr) const { + std::pair, + std::vector> + GetVertices(VOpt v_opt, const std::array& labels, + FILTER_T& expr) const { std::vector offsets; std::vector vids; offsets.reserve(Size()); diff --git a/flex/engines/hqps_db/structures/multi_edge_set/untyped_edge_set.h b/flex/engines/hqps_db/structures/multi_edge_set/untyped_edge_set.h index 3975584d1408..570da55cc0bc 100644 --- a/flex/engines/hqps_db/structures/multi_edge_set/untyped_edge_set.h +++ b/flex/engines/hqps_db/structures/multi_edge_set/untyped_edge_set.h @@ -265,7 +265,8 @@ class UnTypedEdgeSet { const std::vector& GetLabelIndices() const { return label_indices_; } template - std::pair, std::vector> + std::pair, + std::vector> GetVertices(const GetVOpt>& get_v_opt) const { auto v_opt = get_v_opt.v_opt_; @@ -289,7 +290,8 @@ class UnTypedEdgeSet { } } - std::pair, std::vector> + std::pair, + std::vector> getSrcVertices(const std::vector& req_labels) const { std::vector ret; std::vector offset; @@ -328,7 +330,8 @@ class UnTypedEdgeSet { return std::make_pair(std::move(general_set), std::move(offset)); } - std::pair, std::vector> + std::pair, + std::vector> getDstVertices(const std::vector& req_labels) const { std::vector ret; std::vector offset; diff --git a/flex/engines/hqps_db/structures/multi_vertex_set/general_vertex_set.h b/flex/engines/hqps_db/structures/multi_vertex_set/general_vertex_set.h index df086f248744..39fe43b70dea 100644 --- a/flex/engines/hqps_db/structures/multi_vertex_set/general_vertex_set.h +++ b/flex/engines/hqps_db/structures/multi_vertex_set/general_vertex_set.h @@ -110,6 +110,91 @@ auto general_project_vertices_impl( std::move(offset)); } +template +auto general_project_vertices_impl( + const std::vector& old_vec, + const std::vector& old_data_vec, + const std::vector& old_bit_sets, + const std::vector& old_labels, + const std::array& filter_labels, + const EXPR& expr, const std::vector& prop_getters) { + std::vector res_vec; + std::vector res_data_vec; + CHECK(old_bit_sets.size() == old_labels.size()); + CHECK(prop_getters.size() == old_labels.size()); + auto old_num_labels = old_bit_sets.size(); + std::vector res_bitsets(old_num_labels); + // reserve enough size for bitset. + for (auto i = 0; i < old_num_labels; ++i) { + res_bitsets[i].init(old_vec.size()); + } + std::vector select_label_id; + if constexpr (filter_num_labels == 0) { + for (auto i = 0; i < old_labels.size(); ++i) { + select_label_id.emplace_back(i); + } + } else { + std::unordered_set set; + for (auto l : filter_labels) { + set.insert(l); + } + for (auto i = 0; i < old_labels.size(); ++i) { + if (set.find(old_labels[i]) != set.end()) { + select_label_id.emplace_back(i); + } + } + } + VLOG(10) << "selected label ids: " << gs::to_string(select_label_id) + << ", out of size: " << old_labels.size(); + std::vector offset; + + offset.emplace_back(0); + for (auto i = 0; i < old_vec.size(); ++i) { + for (auto label_id : select_label_id) { + if (old_bit_sets[label_id].get_bit(i)) { + auto eles = prop_getters[label_id].get_view(old_vec[i]); + if constexpr (PROP_GETTER::prop_num == 0) { + if (expr()) { + res_bitsets[label_id].set_bit(res_vec.size()); + res_vec.push_back(old_vec[i]); + res_data_vec.push_back(old_data_vec[i]); + break; + } + } else { + if (expr(std::get<0>(eles))) { + res_bitsets[label_id].set_bit(res_vec.size()); + res_vec.push_back(old_vec[i]); + res_data_vec.push_back(old_data_vec[i]); + break; + } + } + } + } + offset.emplace_back(res_vec.size()); + } + for (auto i = 0; i < res_vec.size(); ++i) { + bool flag = false; + for (auto j = 0; j < old_num_labels; ++j) { + flag |= res_bitsets[j].get_bit(i); + } + CHECK(flag) << "check fail at ind: " << i; + } + // resize bitset. + for (auto i = 0; i < old_num_labels; ++i) { + res_bitsets[i].resize(res_vec.size()); + } + for (auto i = 0; i < res_vec.size(); ++i) { + bool flag = false; + for (auto j = 0; j < old_num_labels; ++j) { + flag |= res_bitsets[j].get_bit(i); + } + CHECK(flag) << "check fail at ind: " << i; + } + return std::make_tuple(std::move(res_vec), std::move(res_data_vec), + std::move(res_bitsets), std::move(offset)); +} + template auto general_project_vertices_no_expr_impl( const std::vector& old_vec, @@ -176,6 +261,76 @@ auto general_project_vertices_no_expr_impl( std::move(offset)); } +template +auto general_project_vertices_no_expr_impl( + const std::vector& old_vec, + const std::vector& old_data_vec, + const std::vector& old_bit_sets, + const std::vector& old_labels, + const std::array& filter_labels) { + auto old_num_labels = old_bit_sets.size(); + std::vector res_vec; + std::vector res_data_vec; + std::vector res_bitsets(old_num_labels); + // reserve enough size for bitset. + for (auto i = 0; i < old_num_labels; ++i) { + res_bitsets[i].init(old_vec.size()); + } + std::vector select_label_id; + if constexpr (filter_num_labels == 0) { + for (auto i = 0; i < old_labels.size(); ++i) { + select_label_id.emplace_back(i); + } + } else { + std::unordered_set set; + for (auto l : filter_labels) { + set.insert(l); + } + for (auto i = 0; i < old_labels.size(); ++i) { + if (set.find(old_labels[i]) != set.end()) { + select_label_id.emplace_back(i); + } + } + } + VLOG(10) << "selected label ids: " << gs::to_string(select_label_id) + << ", out of size: " << old_labels.size(); + std::vector offset; + + offset.emplace_back(0); + for (auto i = 0; i < old_vec.size(); ++i) { + for (auto label_id : select_label_id) { + if (old_bit_sets[label_id].get_bit(i)) { + res_bitsets[label_id].set_bit(res_vec.size()); + res_vec.push_back(old_vec[i]); + res_data_vec.push_back(old_data_vec[i]); + break; + } + } + offset.emplace_back(res_vec.size()); + } + for (auto i = 0; i < res_vec.size(); ++i) { + bool flag = false; + for (auto j = 0; j < old_num_labels; ++j) { + flag |= res_bitsets[j].get_bit(i); + } + CHECK(flag) << "check fail at ind: " << i; + } + // resize bitset. + for (auto i = 0; i < old_num_labels; ++i) { + res_bitsets[i].resize(res_vec.size()); + } + for (auto i = 0; i < res_vec.size(); ++i) { + bool flag = false; + for (auto j = 0; j < old_num_labels; ++j) { + flag |= res_bitsets[j].get_bit(i); + } + CHECK(flag) << "check fail at ind: " << i; + } + return std::make_tuple(std::move(res_vec), std::move(res_data_vec), + std::move(res_bitsets), std::move(offset)); +} + template auto general_project_with_repeat_array_impl( const KeyAlias& key_alias, @@ -194,6 +349,41 @@ auto general_project_with_repeat_array_impl( return res_vec; } +template +auto generalSetFlatImpl( + std::vector>& index_ele_tuples, + const std::vector& origin_vids, + const std::vector& origin_data, + const std::vector& origin_bitsets) { + size_t dst_size = index_ele_tuples.size(); + std::vector res_vids; + std::vector res_data_vec; + std::vector res_bitsets(origin_bitsets.size()); + res_vids.reserve(dst_size); + res_data_vec.reserve(dst_size); + for (auto i = 0; i < origin_bitsets.size(); ++i) { + res_bitsets[i].init(dst_size); + } + for (auto ele : index_ele_tuples) { + auto& cur = std::get(ele); + //(ind, vid) + auto ind = std::get<0>(cur); + CHECK(ind < origin_vids.size()); + + for (auto i = 0; i < origin_bitsets.size(); ++i) { + if (origin_bitsets[i].get_bit(ind)) { + res_bitsets[i].set_bit(res_vids.size()); + break; + } + } + res_vids.emplace_back(origin_vids[ind]); + res_data_vec.emplace_back(origin_data[ind]); + } + return std::make_tuple(std::move(res_vids), std::move(res_data_vec), + std::move(res_bitsets)); +} + template auto generalSetFlatImpl( std::vector>& index_ele_tuples, @@ -223,11 +413,75 @@ auto generalSetFlatImpl( return std::make_pair(std::move(res_vids), std::move(res_bitsets)); } -template +template class GeneralVertexSetIter { public: using lid_t = VID_T; - using self_type_t = GeneralVertexSetIter; + using self_type_t = GeneralVertexSetIter; + using index_ele_tuple_t = std::tuple>; + using data_tuple_t = std::tuple>; + + GeneralVertexSetIter(const std::vector& vec, + const std::vector>& data_vec, + const std::vector& prop_names, + const std::vector& bitsets, size_t ind) + : vec_(vec), + data_vec_(data_vec), + prop_names_(prop_names), + bitsets_(bitsets), + ind_(ind) {} + + lid_t GetElement() const { return vec_[ind_]; } + + data_tuple_t GetData() const { return vec_[ind_]; } + + index_ele_tuple_t GetIndexElement() const { + return std::make_tuple(ind_, vec_[ind_], data_vec_[ind_]); + } + + lid_t GetVertex() const { return vec_[ind_]; } + + inline const self_type_t& operator++() { + ++ind_; + return *this; + } + + inline self_type_t operator++(int) { + self_type_t ret(*this); + ++ind_; + return ret; + } + + // We may never compare to other kind of iterators + inline bool operator==(const self_type_t& rhs) const { + return ind_ == rhs.ind_; + } + + inline bool operator!=(const self_type_t& rhs) const { + return ind_ != rhs.ind_; + } + + inline bool operator<(const self_type_t& rhs) const { + return ind_ < rhs.ind_; + } + + inline const self_type_t& operator*() const { return *this; } + + inline const self_type_t* operator->() const { return this; } + + private: + const std::vector& vec_; + const std::vector>& data_vec_; + const std::vector& prop_names_; + const std::vector& bitsets_; + size_t ind_; +}; + +template +class GeneralVertexSetIter { + public: + using lid_t = VID_T; + using self_type_t = GeneralVertexSetIter; using index_ele_tuple_t = std::tuple; using data_tuple_t = std::tuple; @@ -279,18 +533,14 @@ class GeneralVertexSetIter { size_t ind_; }; -/// @brief GeneralVertexSet are designed for the case we need to store multiple -/// label vertex in a mixed manner -/// @tparam VID_T -/// @tparam LabelT -template +template class GeneralVertexSet { public: using lid_t = VID_T; - using self_type_t = GeneralVertexSet; - using iterator = GeneralVertexSetIter; - using index_ele_tuple_t = std::tuple; - using data_tuple_t = std::tuple; + using self_type_t = GeneralVertexSet; + using iterator = GeneralVertexSetIter; + using index_ele_tuple_t = std::tuple>; + using data_tuple_t = std::tuple>; using flat_t = self_type_t; using EntityValueType = VID_T; @@ -299,10 +549,17 @@ class GeneralVertexSet { static constexpr bool is_general_set = true; static constexpr bool is_collection = false; static constexpr bool is_multi_label = false; - GeneralVertexSet(std::vector&& vec, std::vector&& label_names, + GeneralVertexSet(std::vector&& vec, + std::vector>&& data_vec, + std::vector&& prop_names, + std::vector&& label_names, std::vector&& bitsets) - : vec_(std::move(vec)), label_names_(std::move(label_names)) { + : vec_(std::move(vec)), + data_vec_(std::move(data_vec)), + prop_names_(std::move(prop_names)), + label_names_(std::move(label_names)) { CHECK(label_names_.size() == bitsets.size()); + CHECK(vec_.size() == data_vec_.size()); bitsets_.resize(bitsets.size()); for (auto i = 0; i < bitsets.size(); ++i) { bitsets_[i].swap(bitsets[i]); @@ -316,6 +573,8 @@ class GeneralVertexSet { GeneralVertexSet(GeneralVertexSet&& other) : vec_(std::move(other.vec_)), + data_vec_(std::move(other.data_vec_)), + prop_names_(std::move(other.prop_names_)), label_names_(std::move(other.label_names_)) { bitsets_.resize(other.bitsets_.size()); for (auto i = 0; i < bitsets_.size(); ++i) { @@ -328,7 +587,10 @@ class GeneralVertexSet { } GeneralVertexSet(const GeneralVertexSet& other) - : vec_(other.vec_), label_names_(other.label_names_) { + : vec_(other.vec_), + data_vec_(other.data_vec_), + prop_names_(other.prop_names_), + label_names_(other.label_names_) { bitsets_.resize(other.bitsets_.size()); for (auto i = 0; i < bitsets_.size(); ++i) { bitsets_[i].copy(other.bitsets_[i]); @@ -339,9 +601,13 @@ class GeneralVertexSet { } } - iterator begin() const { return iterator(vec_, bitsets_, 0); } + iterator begin() const { + return iterator(vec_, data_vec_, prop_names_, bitsets_, 0); + } - iterator end() const { return iterator(vec_, bitsets_, vec_.size()); } + iterator end() const { + return iterator(vec_, data_vec_, prop_names_, bitsets_, vec_.size()); + } template (tuple)), - std::move(copied_label_names), - std::move(std::get<1>(tuple))); - return std::make_pair(std::move(set), std::move(std::get<2>(tuple))); + auto copied_prop_names(prop_names_); + auto set = self_type_t( + std::move(std::get<0>(tuple)), std::move(std::get<1>(tuple)), + std::move(copied_prop_names), std::move(copied_label_names), + std::move(std::get<2>(tuple))); + return std::make_pair(std::move(set), std::move(std::get<3>(tuple))); } // project without expression. @@ -369,12 +638,14 @@ class GeneralVertexSet { // certain rows. auto tuple = general_project_vertices_no_expr_impl( - vec_, bitsets_, label_names_, filter_labels); + vec_, data_vec_, bitsets_, label_names_, filter_labels); auto copied_label_names(label_names_); - auto set = self_type_t(std::move(std::get<0>(tuple)), - std::move(copied_label_names), - std::move(std::get<1>(tuple))); - return std::make_pair(std::move(set), std::move(std::get<2>(tuple))); + auto copied_prop_names(prop_names_); + auto set = self_type_t( + std::move(std::get<0>(tuple)), std::move(std::get<1>(tuple)), + std::move(copied_prop_names), std::move(copied_label_names), + std::move(std::get<2>(tuple))); + return std::make_pair(std::move(set), std::move(std::get<3>(tuple))); } const std::vector& GetLabels() const { return label_names_; } @@ -449,13 +720,16 @@ class GeneralVertexSet { // subset with indices. void SubSetWithIndices(std::vector& indices) { std::vector res_vec; + std::vector> res_data_vec; std::vector res_bitsets(bitsets_.size()); for (auto& i : res_bitsets) { i.init(indices.size()); } res_vec.reserve(indices.size()); + res_data_vec.reserve(indices.size()); for (auto i : indices) { res_vec.emplace_back(vec_[i]); + res_data_vec.emplace_back(data_vec_[i]); for (auto j = 0; j < bitsets_.size(); ++j) { if (bitsets_[j].get_bit(i)) { res_bitsets[j].set_bit(i); @@ -464,6 +738,7 @@ class GeneralVertexSet { } } vec_.swap(res_vec); + data_vec_.swap(res_data_vec); for (auto i = 0; i < bitsets_.size(); ++i) { bitsets_[i].swap(res_bitsets[i]); } @@ -474,6 +749,7 @@ class GeneralVertexSet { self_type_t ProjectWithRepeatArray(const std::vector& repeat_array, KeyAlias& key_alias) const { std::vector next_vids; + std::vector> next_data_vec; size_t next_size = 0; for (auto i = 0; i < repeat_array.size(); ++i) { next_size += repeat_array[i]; @@ -482,6 +758,7 @@ class GeneralVertexSet { << " Project self, next size: " << next_size; next_vids.reserve(next_size); + next_data_vec.reserve(next_size); std::vector next_sets(bitsets_.size()); for (auto& i : next_sets) { i.init(next_size); @@ -500,12 +777,15 @@ class GeneralVertexSet { // VLOG(10) << "Project: " << vids_[i]; next_sets[ind].set_bit(next_vids.size()); next_vids.push_back(vec_[i]); + next_data_vec.push_back(data_vec_[i]); } } auto copied_label_names(label_names_); - return self_type_t(std::move(next_vids), std::move(copied_label_names), - std::move(next_sets)); + auto copied_prop_names(prop_names_); + return self_type_t(std::move(next_vids), std::move(next_data_vec), + std::move(copied_prop_names), + std::move(copied_label_names), std::move(next_sets)); } void Repeat(std::vector& cur_offset, @@ -514,6 +794,7 @@ class GeneralVertexSet { CHECK(cur_offset.back() == vec_.back()) << "neq : " << cur_offset.back() << ", " << vec_.back(); std::vector res_vec; + std::vector> res_data_vec; std::vector res_bitsets(bitsets_.size()); size_t total_cnt = repeat_vec.back(); VLOG(10) << "Repeat current vertices num: " << vec_.size() << ", to " @@ -525,12 +806,14 @@ class GeneralVertexSet { auto label_indices = GenerateLabelIndices(); size_t cur_ind = 0; res_vec.reserve(repeat_vec.back()); + res_data_vec.reserve(repeat_vec.back()); for (auto i = 0; i + 1 < cur_offset.size(); ++i) { auto times_to_repeat = repeat_vec[i + 1] - repeat_vec[i]; auto num_ele = cur_offset[i + 1] - cur_offset[i]; for (auto j = 0; j < times_to_repeat; ++j) { for (auto k = cur_offset[i]; k < cur_offset[i + 1]; ++k) { res_vec.emplace_back(vec_[k]); + res_data_vec.emplace_back(data_vec_[k]); CHECK(label_indices[k] < res_bitsets.size()); res_bitsets[label_indices[k]].set_bit(cur_ind++); } @@ -539,6 +822,7 @@ class GeneralVertexSet { CHECK(cur_ind == repeat_vec.back()); } vec_.swap(res_vec); + data_vec_.swap(res_data_vec); bitsets_.swap(res_bitsets); VLOG(10) << "Finish Repeat general vertex"; } @@ -549,21 +833,366 @@ class GeneralVertexSet { static_assert(col_ind < std::tuple_size_v>); auto res_vids_and_data_tuples = - generalSetFlatImpl(index_ele_tuple, vec_, bitsets_); + generalSetFlatImpl(index_ele_tuple, vec_, data_vec_, bitsets_); auto labels_copied(label_names_); - return self_type_t(std::move(res_vids_and_data_tuples.first), - std::move(labels_copied), - std::move(res_vids_and_data_tuples.second)); + auto prop_names_copied(prop_names_); + return self_type_t(std::move(std::get<0>(res_vids_and_data_tuples)), + std::move(std::get<1>(res_vids_and_data_tuples)), + std::move(prop_names_copied), std::move(labels_copied), + std::move(std::get<2>(res_vids_and_data_tuples))); } - template + template void fillBuiltinPropsImpl(std::vector>& tuples, - std::string& prop_name, + const std::string& prop_name, std::vector& repeat_array) const { - if constexpr (std::is_same_v>, - Dist>) { - if (prop_name == "dist") { - LOG(FATAL) << "Not supported"; + using cur_prop = std::tuple_element_t>; + using my_prop = std::tuple_element_t>; + if constexpr (std::is_same_v) { + CHECK(MyIs < prop_names_.size()); + if (prop_name == prop_names_[MyIs]) { + VLOG(10) << "Found builtin property " << prop_name; + CHECK(repeat_array.size() == data_vec_.size()); + size_t ind = 0; + for (auto i = 0; i < repeat_array.size(); ++i) { + for (auto j = 0; j < repeat_array[i]; ++j) { + std::get(tuples[ind]) = std::get<0>(data_vec_[i]); + ind += 1; + } + } + } + } + } + + template + void fillBuiltinPropsImpl(std::vector>& tuples, + std::string& prop_name, + std::vector& repeat_array, + std::index_sequence) const { + (fillBuiltinPropsImpl(tuples, prop_name, repeat_array), ...); + } + + template + void fillBuiltinPropsImpl(std::vector>& tuples, + PropNameArray& prop_names, + std::vector& repeat_array, + std::index_sequence) const { + (fillBuiltinPropsImpl(tuples, std::get(prop_names), + repeat_array, + std::index_sequence()), + ...); + } + + template + void fillBuiltinProps(std::vector>& tuples, + PropNameArray& prop_names, + std::vector& repeat_array) const { + fillBuiltinPropsImpl(tuples, prop_names, repeat_array, + std::make_index_sequence()); + } + + // No repeat array is not provided + template + void fillBuiltinProps(std::vector>& tuples, + PropNameArray& prop_names) { + // TO be implemented. + } + + size_t Size() const { return vec_.size(); } + + private: + std::vector vec_; + std::vector> data_vec_; + std::vector label_names_; + std::vector prop_names_; + std::vector bitsets_; +}; + +/// @brief GeneralVertexSet are designed for the case we need to store multiple +/// label vertex in a mixed manner +/// @tparam VID_T +/// @tparam LabelT +template +class GeneralVertexSet { + public: + using lid_t = VID_T; + using self_type_t = GeneralVertexSet; + using iterator = GeneralVertexSetIter; + using index_ele_tuple_t = std::tuple; + using data_tuple_t = std::tuple; + using flat_t = self_type_t; + using EntityValueType = VID_T; + + static constexpr bool is_vertex_set = true; + static constexpr bool is_two_label_set = false; + static constexpr bool is_general_set = true; + static constexpr bool is_collection = false; + static constexpr bool is_multi_label = false; + GeneralVertexSet(std::vector&& vec, std::vector&& label_names, + std::vector&& bitsets) + : vec_(std::move(vec)), label_names_(std::move(label_names)) { + CHECK(label_names_.size() == bitsets.size()); + bitsets_.resize(bitsets.size()); + for (auto i = 0; i < bitsets.size(); ++i) { + bitsets_[i].swap(bitsets[i]); + } + if (bitsets_.size() > 0) { + VLOG(10) << "[GeneralVertexSet], size: " << vec_.size() + << ", bitset size: " << bitsets_[0].cardinality() + << ", ind_ele_t: " << demangle(index_ele_tuple_t()); + } + } + + GeneralVertexSet(GeneralVertexSet&& other) + : vec_(std::move(other.vec_)), + label_names_(std::move(other.label_names_)) { + bitsets_.resize(other.bitsets_.size()); + for (auto i = 0; i < bitsets_.size(); ++i) { + bitsets_[i].swap(other.bitsets_[i]); + } + if (bitsets_.size() > 0) { + VLOG(10) << "[GeneralVertexSet], size: " << vec_.size() + << ", bitset size: " << bitsets_[0].cardinality(); + } + } + + GeneralVertexSet(const GeneralVertexSet& other) + : vec_(other.vec_), label_names_(other.label_names_) { + bitsets_.resize(other.bitsets_.size()); + for (auto i = 0; i < bitsets_.size(); ++i) { + bitsets_[i].copy(other.bitsets_[i]); + } + if (bitsets_.size() > 0) { + VLOG(10) << "[GeneralVertexSet], size: " << vec_.size() + << ", bitset size: " << bitsets_[0].cardinality(); + } + } + + iterator begin() const { return iterator(vec_, bitsets_, 0); } + + iterator end() const { return iterator(vec_, bitsets_, vec_.size()); } + + template >> + RES_T project_vertices(std::array& filter_labels, + EXPRESSION& exprs, + std::vector& prop_getter) const { + // TODO: vector-based cols should be able to be selected with + // certain rows. + + auto tuple = general_project_vertices_impl( + vec_, bitsets_, label_names_, filter_labels, exprs, prop_getter); + auto copied_label_names(label_names_); + auto set = self_type_t(std::move(std::get<0>(tuple)), + std::move(copied_label_names), + std::move(std::get<1>(tuple))); + return std::make_pair(std::move(set), std::move(std::get<2>(tuple))); + } + + // project without expression. + template >> + RES_T project_vertices(std::array& filter_labels) const { + // TODO: vector-based cols should be able to be selected with + // certain rows. + + auto tuple = general_project_vertices_no_expr_impl( + vec_, bitsets_, label_names_, filter_labels); + auto copied_label_names(label_names_); + auto set = self_type_t(std::move(std::get<0>(tuple)), + std::move(copied_label_names), + std::move(std::get<1>(tuple))); + return std::make_pair(std::move(set), std::move(std::get<2>(tuple))); + } + + const std::vector& GetLabels() const { return label_names_; } + + LabelT GetLabel(size_t i) const { return label_names_[i]; } + + const std::vector GetLabelVec() const { + std::vector res; + // fill res with vertex labels + for (auto i = 0; i < vec_.size(); ++i) { + for (auto j = 0; j < bitsets_.size(); ++j) { + if (bitsets_[j].get_bit(i)) { + res.emplace_back(label_names_[j]); + break; + } + } + } + return res; + } + + // generate label indices. + std::vector GenerateLabelIndices() const { + std::vector label_indices; + label_indices.resize(vec_.size(), 255); + for (auto i = 0; i < bitsets_.size(); ++i) { + for (auto j = 0; j < bitsets_[i].cardinality(); ++j) { + if (bitsets_[i].get_bit(j)) { + CHECK(label_indices[j] == 255); + label_indices[j] = i; + } + } + } + return label_indices; + } + + const std::vector& GetBitsets() const { return bitsets_; } + + const std::vector& GetVertices() const { return vec_; } + + std::pair, std::vector> GetVerticesWithLabel( + label_t label_id) const { + // find label_id in label_names_ + auto it = std::find(label_names_.begin(), label_names_.end(), label_id); + if (it == label_names_.end()) { + return std::make_pair(std::vector(), std::vector()); + } else { + auto ind = std::distance(label_names_.begin(), it); + return GetVerticesWithIndex(ind); + } + } + + std::pair, std::vector> GetVerticesWithIndex( + size_t ind) const { + CHECK(ind < bitsets_.size()); + std::vector res; + std::vector active_ind; + size_t cnt = bitsets_[ind].count(); + res.reserve(cnt); + active_ind.reserve(cnt); + for (auto i = 0; i < bitsets_[ind].cardinality(); ++i) { + if (bitsets_[ind].get_bit(i)) { + res.push_back(vec_[i]); + active_ind.push_back(i); + } + } + VLOG(10) << "Got vertices of tag: " << ind + << ", res vertices: " << res.size() + << ", active_ind: " << active_ind.size(); + return std::make_pair(std::move(res), std::move(active_ind)); + } + + // subset with indices. + void SubSetWithIndices(std::vector& indices) { + std::vector res_vec; + std::vector res_bitsets(bitsets_.size()); + for (auto& i : res_bitsets) { + i.init(indices.size()); + } + res_vec.reserve(indices.size()); + for (auto i : indices) { + res_vec.emplace_back(vec_[i]); + for (auto j = 0; j < bitsets_.size(); ++j) { + if (bitsets_[j].get_bit(i)) { + res_bitsets[j].set_bit(i); + break; + } + } + } + vec_.swap(res_vec); + for (auto i = 0; i < bitsets_.size(); ++i) { + bitsets_[i].swap(res_bitsets[i]); + } + } + + template ::type* = nullptr> + self_type_t ProjectWithRepeatArray(const std::vector& repeat_array, + KeyAlias& key_alias) const { + std::vector next_vids; + size_t next_size = 0; + for (auto i = 0; i < repeat_array.size(); ++i) { + next_size += repeat_array[i]; + } + VLOG(10) << "[GeneralVertexSet] size: " << Size() + << " Project self, next size: " << next_size; + + next_vids.reserve(next_size); + std::vector next_sets(bitsets_.size()); + for (auto& i : next_sets) { + i.init(next_size); + } + VLOG(10) << "after init"; + for (auto i = 0; i < repeat_array.size(); ++i) { + size_t ind = 0; + while (ind < bitsets_.size()) { + if (bitsets_[ind].get_bit(i)) { + break; + } + ind += 1; + } + CHECK(ind < bitsets_.size()); + for (auto j = 0; j < repeat_array[i]; ++j) { + // VLOG(10) << "Project: " << vids_[i]; + next_sets[ind].set_bit(next_vids.size()); + next_vids.push_back(vec_[i]); + } + } + + auto copied_label_names(label_names_); + return self_type_t(std::move(next_vids), std::move(copied_label_names), + std::move(next_sets)); + } + + void Repeat(std::vector& cur_offset, + std::vector& repeat_vec) { + CHECK(cur_offset.size() == repeat_vec.size()); + CHECK(cur_offset.back() == vec_.back()) + << "neq : " << cur_offset.back() << ", " << vec_.back(); + std::vector res_vec; + std::vector res_bitsets(bitsets_.size()); + size_t total_cnt = repeat_vec.back(); + VLOG(10) << "Repeat current vertices num: " << vec_.size() << ", to " + << total_cnt; + for (auto i = 0; i < res_bitsets.size(); ++i) { + res_bitsets[i].init(total_cnt); + } + { + auto label_indices = GenerateLabelIndices(); + size_t cur_ind = 0; + res_vec.reserve(repeat_vec.back()); + for (auto i = 0; i + 1 < cur_offset.size(); ++i) { + auto times_to_repeat = repeat_vec[i + 1] - repeat_vec[i]; + auto num_ele = cur_offset[i + 1] - cur_offset[i]; + for (auto j = 0; j < times_to_repeat; ++j) { + for (auto k = cur_offset[i]; k < cur_offset[i + 1]; ++k) { + res_vec.emplace_back(vec_[k]); + CHECK(label_indices[k] < res_bitsets.size()); + res_bitsets[label_indices[k]].set_bit(cur_ind++); + } + } + } + CHECK(cur_ind == repeat_vec.back()); + } + vec_.swap(res_vec); + bitsets_.swap(res_bitsets); + VLOG(10) << "Finish Repeat general vertex"; + } + + // Usually after sort. + template + flat_t Flat(std::vector>& index_ele_tuple) { + static_assert(col_ind < + std::tuple_size_v>); + auto res_vids_and_data_tuples = + generalSetFlatImpl(index_ele_tuple, vec_, bitsets_); + auto labels_copied(label_names_); + return self_type_t(std::move(res_vids_and_data_tuples.first), + std::move(labels_copied), + std::move(res_vids_and_data_tuples.second)); + } + + template + void fillBuiltinPropsImpl(std::vector>& tuples, + std::string& prop_name, + std::vector& repeat_array) const { + if constexpr (std::is_same_v>, + Dist>) { + if (prop_name == "dist") { + LOG(FATAL) << "Not supported"; } } } @@ -607,8 +1236,8 @@ template auto make_general_set(std::vector&& vec, std::vector&& label_names, std::vector&& bitsets) { - return GeneralVertexSet(std::move(vec), std::move(label_names), - std::move(bitsets)); + return GeneralVertexSet( + std::move(vec), std::move(label_names), std::move(bitsets)); } template @@ -616,10 +1245,21 @@ auto make_general_set(std::vector&& vec, const std::vector& label_names, std::vector&& bitsets) { auto copied_label_names(label_names); - return GeneralVertexSet( + return GeneralVertexSet( std::move(vec), std::move(copied_label_names), std::move(bitsets)); } +template +auto make_general_set(std::vector&& vec, + std::vector>&& data_vec, + std::vector&& prop_names, + std::vector&& label_names, + std::vector&& bitsets) { + return GeneralVertexSet( + std::move(vec), std::move(data_vec), std::move(prop_names), + std::move(label_names), std::move(bitsets)); +} + static std::vector> bitsets_to_vids_inds( const std::vector& bitset) { auto num_labels = bitset.size(); @@ -648,11 +1288,12 @@ static std::vector> bitsets_to_vids_inds( return res; } -template +template static auto get_property_tuple_general( const GRAPH_INTERFACE& graph, - const GeneralVertexSet& - general_set, + const GeneralVertexSet& general_set, const std::array& prop_names) { auto label_vec = general_set.GetLabels(); auto vids_inds = bitsets_to_vids_inds(general_set.GetBitsets()); @@ -663,11 +1304,12 @@ static auto get_property_tuple_general( return data_tuples; } -template +template static auto get_property_tuple_general( const GRAPH_INTERFACE& graph, - const GeneralVertexSet& - general_set, + const GeneralVertexSet& general_set, const std::tuple...>& named_prop) { std::array prop_names; int ind = 0; diff --git a/flex/engines/hqps_db/structures/multi_vertex_set/keyed_row_vertex_set.h b/flex/engines/hqps_db/structures/multi_vertex_set/keyed_row_vertex_set.h index 170a10972e41..8002bf4bf083 100644 --- a/flex/engines/hqps_db/structures/multi_vertex_set/keyed_row_vertex_set.h +++ b/flex/engines/hqps_db/structures/multi_vertex_set/keyed_row_vertex_set.h @@ -569,7 +569,7 @@ class KeyedRowVertexSetImpl { LabelT GetLabel() const { return v_label_; } - const std::vector GetLabelVec() { + std::vector GetLabelVec() const { std::vector res; // fill res with vertex labels res.reserve(vids_.size()); @@ -778,7 +778,7 @@ class KeyedRowVertexSetImpl { LabelT GetLabel() const { return v_label_; } - const std::vector GetLabelVec() { + std::vector GetLabelVec() const { std::vector res; // fill res with vertex labels res.reserve(vids_.size()); diff --git a/flex/engines/hqps_db/structures/multi_vertex_set/row_vertex_set.h b/flex/engines/hqps_db/structures/multi_vertex_set/row_vertex_set.h index 696eda5146c9..6b8f3b93575c 100644 --- a/flex/engines/hqps_db/structures/multi_vertex_set/row_vertex_set.h +++ b/flex/engines/hqps_db/structures/multi_vertex_set/row_vertex_set.h @@ -950,7 +950,7 @@ class RowVertexSetImpl { const LabelT& GetLabel() const { return v_label_; } - const std::vector GetLabelVec() { + std::vector GetLabelVec() const { std::vector res; // fill res with v_label_ res.reserve(vids_.size()); @@ -1254,7 +1254,7 @@ class RowVertexSetImpl { const LabelT& GetLabel() const { return v_label_; } - const std::vector GetLabelVec() { + std::vector GetLabelVec() const { std::vector res; // fill res with v_label_ res.reserve(vids_.size()); diff --git a/flex/engines/hqps_db/structures/multi_vertex_set/two_label_vertex_set.h b/flex/engines/hqps_db/structures/multi_vertex_set/two_label_vertex_set.h index 8164832c0583..11f989370466 100644 --- a/flex/engines/hqps_db/structures/multi_vertex_set/two_label_vertex_set.h +++ b/flex/engines/hqps_db/structures/multi_vertex_set/two_label_vertex_set.h @@ -744,7 +744,7 @@ class TwoLabelVertexSetImpl { const std::array& GetLabels() const { return label_names_; } - const std::vector GetLabelVec() { + std::vector GetLabelVec() const { std::vector res; // fill with each vertex's label for (auto i = 0; i < vec_.size(); ++i) { @@ -1019,8 +1019,8 @@ class TwoLabelVertexSetImpl { const std::array& GetLabels() const { return label_names_; } - const std::vector GetLabelVec() { - std::vector res; + std::vector GetLabelVec() const { + std::vector res; // fill with each vertex's label for (auto i = 0; i < vec_.size(); ++i) { if (bitset_.get_bit(i)) { diff --git a/flex/resources/queries/ic/adhoc/simple_match_11.cypher b/flex/resources/queries/ic/adhoc/simple_match_11.cypher new file mode 100644 index 000000000000..be53017db261 --- /dev/null +++ b/flex/resources/queries/ic/adhoc/simple_match_11.cypher @@ -0,0 +1 @@ +MATCH(a)-[:REPLYOF*0..3]->(b) return a.content, b.content LIMIT 10; \ No newline at end of file diff --git a/flex/tests/hqps/match_query.h b/flex/tests/hqps/match_query.h index 054a55518f6d..9b533bcedf48 100644 --- a/flex/tests/hqps/match_query.h +++ b/flex/tests/hqps/match_query.h @@ -828,5 +828,46 @@ class MatchQuery13 : public HqpsAppBase { } }; +// Auto generated query class definition +class MatchQuery14 : public HqpsAppBase { + public: + using Engine = SyncEngine; + using label_id_t = typename gs::MutableCSRInterface::label_id_t; + using vertex_id_t = typename gs::MutableCSRInterface::vertex_id_t; + // Query function for query class + results::CollectiveResults Query(const gs::MutableCSRInterface& graph) const { + auto expr0 = gs::make_filter(Query0expr0()); + auto ctx0 = Engine::template ScanVertex( + graph, 2, std::move(expr0)); + + auto edge_expand_opt1 = gs::make_edge_expandv_opt( + gs::Direction::Out, (label_id_t) 2, + std::array{(label_id_t) 2, (label_id_t) 3}); + + auto get_v_opt0 = make_getv_opt( + gs::VOpt::Itself, + std::array{(label_id_t) 2, (label_id_t) 3}); + + auto path_opt2 = gs::make_path_expandv_opt( + std::move(edge_expand_opt1), std::move(get_v_opt0), gs::Range(0, 3)); + auto ctx1 = Engine::PathExpandV( + graph, std::move(ctx0), std::move(path_opt2)); + auto ctx2 = Engine::Project( + graph, std::move(ctx1), + std::tuple{gs::make_mapper_with_variable( + gs::PropertySelector("id")), + gs::make_mapper_with_variable( + gs::PropertySelector("id"))}); + return Engine::Sink(graph, ctx2, std::array{2, 3}); + } + // Wrapper query function for query class + results::CollectiveResults Query(const gs::MutableCSRInterface& graph, + Decoder& decoder) const override { + // decoding params from decoder, and call real query func + + return Query(graph); + } +}; + } // namespace gs #endif // TESTS_HQPS_MATCH_QUERY_H_ \ No newline at end of file diff --git a/flex/tests/hqps/query_test.cc b/flex/tests/hqps/query_test.cc index 8351574c699e..c803d3cdc357 100644 --- a/flex/tests/hqps/query_test.cc +++ b/flex/tests/hqps/query_test.cc @@ -227,5 +227,18 @@ int main(int argc, char** argv) { LOG(INFO) << "Finish MatchQuery12 test"; } + { + gs::MatchQuery14 query; + std::vector encoder_array; + gs::Encoder input_encoder(encoder_array); + std::vector output_array; + gs::Encoder output(output_array); + gs::Decoder input(encoder_array.data(), encoder_array.size()); + + gs::MutableCSRInterface graph(sess); + query.Query(graph, input); + LOG(INFO) << "Finish MatchQuery14 test"; + } + LOG(INFO) << "Finish context test."; } \ No newline at end of file diff --git a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/integration/suite/simple/SimpleMatchQueries.java b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/integration/suite/simple/SimpleMatchQueries.java index 411f297eecbf..65e44e56585a 100644 --- a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/integration/suite/simple/SimpleMatchQueries.java +++ b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/integration/suite/simple/SimpleMatchQueries.java @@ -151,4 +151,16 @@ public static QueryContext get_simple_match_query_11_test() { "Record<{p: node<4>}>"); return new QueryContext(query, expected); } + +public static QueryContext get_simple_match_query_12_test() { + String query = "MATCH(p)<-[:ISLOCATEDIN*1..2]-(a : POST | COMMENT) WITH DISTINCT p, a RETURN p.id AS placeId, p.name AS placeName, a.id AS postOrCommentId ORDER BY placeId ASC, postOrCommentId ASC LIMIT 5;"; + List expected = + Arrays.asList( + "Record<{placeId: 0, placeName: \"India\", postOrCommentId: 54780}>", + "Record<{placeId: 0, placeName: \"India\", postOrCommentId: 54971}>", + "Record<{placeId: 0, placeName: \"India\", postOrCommentId: 54972}>", + "Record<{placeId: 0, placeName: \"India\", postOrCommentId: 54973}>", + "Record<{placeId: 0, placeName: \"India\", postOrCommentId: 54974}>"); + return new QueryContext(query, expected); + } } diff --git a/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/cypher/integration/ldbc/SimpleMatchTest.java b/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/cypher/integration/ldbc/SimpleMatchTest.java index c9d95db0ac59..691ee96ec36a 100644 --- a/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/cypher/integration/ldbc/SimpleMatchTest.java +++ b/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/cypher/integration/ldbc/SimpleMatchTest.java @@ -114,6 +114,14 @@ public void run_simple_match_11_test() { Assert.assertEquals(testQuery.getExpectedResult().toString(), result.list().toString()); } + @Test + public void run_simple_match_12_test() { + QueryContext testQuery = SimpleMatchQueries.get_simple_match_query_12_test(); + Result result = session.run(testQuery.getQuery()); + Assert.assertEquals(testQuery.getExpectedResult().toString(), result.list().toString()); + } + + @AfterClass public static void afterClass() { if (session != null) { From d37d5df03c5a9e131ed8be2b20a6ee0b5a708f05 Mon Sep 17 00:00:00 2001 From: "xiaolei.zl" Date: Tue, 7 Nov 2023 14:34:56 +0800 Subject: [PATCH 2/6] format --- .../integration/suite/simple/SimpleMatchQueries.java | 7 +++++-- .../cypher/integration/ldbc/SimpleMatchTest.java | 1 - 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/integration/suite/simple/SimpleMatchQueries.java b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/integration/suite/simple/SimpleMatchQueries.java index 65e44e56585a..c447b51a3e9d 100644 --- a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/integration/suite/simple/SimpleMatchQueries.java +++ b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/integration/suite/simple/SimpleMatchQueries.java @@ -152,8 +152,11 @@ public static QueryContext get_simple_match_query_11_test() { return new QueryContext(query, expected); } -public static QueryContext get_simple_match_query_12_test() { - String query = "MATCH(p)<-[:ISLOCATEDIN*1..2]-(a : POST | COMMENT) WITH DISTINCT p, a RETURN p.id AS placeId, p.name AS placeName, a.id AS postOrCommentId ORDER BY placeId ASC, postOrCommentId ASC LIMIT 5;"; + public static QueryContext get_simple_match_query_12_test() { + String query = + "MATCH(p)<-[:ISLOCATEDIN*1..2]-(a : POST | COMMENT) WITH DISTINCT p, a RETURN p.id" + + " AS placeId, p.name AS placeName, a.id AS postOrCommentId ORDER BY placeId" + + " ASC, postOrCommentId ASC LIMIT 5;"; List expected = Arrays.asList( "Record<{placeId: 0, placeName: \"India\", postOrCommentId: 54780}>", diff --git a/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/cypher/integration/ldbc/SimpleMatchTest.java b/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/cypher/integration/ldbc/SimpleMatchTest.java index 691ee96ec36a..c05c8746c0a1 100644 --- a/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/cypher/integration/ldbc/SimpleMatchTest.java +++ b/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/cypher/integration/ldbc/SimpleMatchTest.java @@ -121,7 +121,6 @@ public void run_simple_match_12_test() { Assert.assertEquals(testQuery.getExpectedResult().toString(), result.list().toString()); } - @AfterClass public static void afterClass() { if (session != null) { From 1d7296069b338f48ca215fb7a8f5d7101f8622f5 Mon Sep 17 00:00:00 2001 From: "xiaolei.zl" Date: Mon, 13 Nov 2023 16:07:01 +0800 Subject: [PATCH 3/6] some minor fix --- .../src/hqps/hqps_path_expand_builder.h | 10 +++- .../hqps_db/core/operator/path_expand.h | 56 +++++++++---------- 2 files changed, 34 insertions(+), 32 deletions(-) diff --git a/flex/codegen/src/hqps/hqps_path_expand_builder.h b/flex/codegen/src/hqps/hqps_path_expand_builder.h index 04a26f8d8104..f17d505aecef 100644 --- a/flex/codegen/src/hqps/hqps_path_expand_builder.h +++ b/flex/codegen/src/hqps/hqps_path_expand_builder.h @@ -117,7 +117,9 @@ class PathExpandOpBuilder { throw std::runtime_error("Expect edge graph type"); } auto& edge_type = act_graph_type.graph_data_type(); - CHECK(edge_type.size() >= 1) << "Expect at least one edge type"; + if (edge_type.size() == 0) { + throw std::runtime_error("Expect edge type size > 0"); + } std::vector src_labels, dst_labels; for (auto i = 0; i < edge_type.size(); ++i) { auto& edge_type_i = edge_type[i]; @@ -141,9 +143,11 @@ class PathExpandOpBuilder { std::sort(dst_labels.begin(), dst_labels.end()); dst_labels.erase(std::unique(dst_labels.begin(), dst_labels.end()), dst_labels.end()); - CHECK(src_labels.size() == dst_labels.size()); for (auto i = 0; i < src_labels.size(); ++i) { - CHECK(src_labels[i] == dst_labels[i]); + if (src_labels[i] != dst_labels[i]) { + throw std::runtime_error( + "Expect src_label == dst_label for both direction"); + } dst_vertex_labels_.emplace_back(dst_labels[i]); } } else if (direction_ == internal::Direction::kOut) { diff --git a/flex/engines/hqps_db/core/operator/path_expand.h b/flex/engines/hqps_db/core/operator/path_expand.h index 3ea15daf4428..e55a400f0712 100644 --- a/flex/engines/hqps_db/core/operator/path_expand.h +++ b/flex/engines/hqps_db/core/operator/path_expand.h @@ -472,18 +472,19 @@ class PathExpand { graph, edge_label, edge_other_labels_vec, vertex_other_labels_vec, edge_opt.direction_, vertices_vec, src_labels_vec, range); auto res_dist_tuple = single_col_vec_to_tuple_vec(std::move(res_dists)); - CHECK(res_offsets.size() == vertices_vec.size() + 1); - CHECK(res_vertices.size() == res_labels_vec.size()); std::vector res_bitsets; std::vector label_id_vec; std::tie(res_bitsets, label_id_vec) = convert_label_id_vec_to_bitsets(res_labels_vec); - CHECK(label_id_vec.size() <= vertex_other_labels.size()) - << "label_id_vec.size(): " << label_id_vec.size() - << ", vertex_other_labels.size(): " << vertex_other_labels.size(); - CHECK(res_bitsets.size() == label_id_vec.size()) - << "res_bitsets.size(): " << res_bitsets.size() - << ", label_id_vec.size(): " << label_id_vec.size(); + if (label_id_vec.size() > vertex_other_labels.size()) { + LOG(ERROR) << "Error state: label_id_vec.size(): " << label_id_vec.size() + << ", vertex_other_labels.size(): " + << vertex_other_labels.size(); + } + if (res_bitsets.size() != label_id_vec.size()) { + LOG(ERROR) << "Error state: res_bitsets.size(): " << res_bitsets.size() + << ", label_id_vec.size(): " << label_id_vec.size(); + } auto set = make_general_set( std::move(res_vertices), std::move(res_dist_tuple), {"dist"}, std::move(label_id_vec), std::move(res_bitsets)); @@ -516,18 +517,19 @@ class PathExpand { graph, edge_label, edge_other_labels_vec, vertex_other_labels_vec, edge_opt.direction_, vertices_vec, src_labels_vec, range); auto res_dist_tuple = single_col_vec_to_tuple_vec(std::move(res_dists)); - CHECK(res_offsets.size() == vertices_vec.size() + 1); - CHECK(res_vertices.size() == res_labels_vec.size()); std::vector res_bitsets; std::vector label_id_vec; std::tie(res_bitsets, label_id_vec) = convert_label_id_vec_to_bitsets(res_labels_vec); - CHECK(label_id_vec.size() <= vertex_other_labels.size()) - << "label_id_vec.size(): " << label_id_vec.size() - << ", vertex_other_labels.size(): " << vertex_other_labels.size(); - CHECK(res_bitsets.size() == label_id_vec.size()) - << "res_bitsets.size(): " << res_bitsets.size() - << ", label_id_vec.size(): " << label_id_vec.size(); + if (label_id_vec.size() > vertex_other_labels.size()) { + LOG(ERROR) << "Error state: label_id_vec.size(): " << label_id_vec.size() + << ", vertex_other_labels.size(): " + << vertex_other_labels.size(); + } + if (res_bitsets.size() != label_id_vec.size()) { + LOG(ERROR) << "Error state: res_bitsets.size(): " << res_bitsets.size() + << ", label_id_vec.size(): " << label_id_vec.size(); + } auto set = make_general_set( std::move(res_vertices), std::move(res_dist_tuple), {"dist"}, std::move(label_id_vec), std::move(res_bitsets)); @@ -559,18 +561,19 @@ class PathExpand { graph, edge_label, edge_other_labels_vec, vertex_other_labels_vec, edge_opt.direction_, vertices_vec, src_labels_vec, range); auto res_dist_tuple = single_col_vec_to_tuple_vec(std::move(res_dists)); - CHECK(res_offsets.size() == vertices_vec.size() + 1); - CHECK(res_vertices.size() == res_labels_vec.size()); std::vector res_bitsets; std::vector label_id_vec; std::tie(res_bitsets, label_id_vec) = convert_label_id_vec_to_bitsets(res_labels_vec); - CHECK(label_id_vec.size() <= vertex_other_labels.size()) - << "label_id_vec.size(): " << label_id_vec.size() - << ", vertex_other_labels.size(): " << vertex_other_labels.size(); - CHECK(res_bitsets.size() == label_id_vec.size()) - << "res_bitsets.size(): " << res_bitsets.size() - << ", label_id_vec.size(): " << label_id_vec.size(); + if (label_id_vec.size() > vertex_other_labels.size()) { + LOG(ERROR) << "Error state: label_id_vec.size(): " << label_id_vec.size() + << ", vertex_other_labels.size(): " + << vertex_other_labels.size(); + } + if (res_bitsets.size() != label_id_vec.size()) { + LOG(ERROR) << "Error state: res_bitsets.size(): " << res_bitsets.size() + << ", label_id_vec.size(): " << label_id_vec.size(); + } auto set = make_general_set( std::move(res_vertices), std::move(res_dist_tuple), {"dist"}, std::move(label_id_vec), std::move(res_bitsets)); @@ -804,7 +807,6 @@ class PathExpand { } tmp_cur_offset.emplace_back(cur_cnt); } - CHECK(cur_cnt == cur_hop_new_vnum); for (auto i = 0; i < other_offsets[cur_hop - 1].size(); ++i) { other_offsets[cur_hop].emplace_back( tmp_cur_offset[other_offsets[cur_hop - 1][i]]); @@ -818,8 +820,6 @@ class PathExpand { std::vector res_labels_vec; std::vector valid_labels(sizeof(label_id_t) * 8, false); for (auto& v_label : vertex_other_labels) { - CHECK(v_label < sizeof(label_id_t) * 8) - << "v_label: " << v_label << ", " << sizeof(label_id_t) * 8; valid_labels[v_label] = true; } auto num_valid_labels = @@ -879,10 +879,8 @@ class PathExpand { for (auto i = 0; i < label_vec.size(); ++i) { auto index = label_to_index[label_vec[i]]; - CHECK(index != -1); res_bitsets[index].set_bit(i); } - CHECK(res_label_id_vec.size() == num_valid_labels); return std::make_pair(std::move(res_bitsets), std::move(res_label_id_vec)); } From d1ce8ab28f8f9ec6fbc59de0bfd002833740b888 Mon Sep 17 00:00:00 2001 From: "xiaolei.zl" Date: Tue, 14 Nov 2023 19:21:31 +0800 Subject: [PATCH 4/6] support multiple edge triplet --- .github/workflows/hqps-db-ci.yml | 2 +- .../src/hqps/hqps_edge_expand_builder.h | 57 +++- .../src/hqps/hqps_path_expand_builder.h | 26 +- .../hqps_db/core/operator/path_expand.h | 316 ++++++++++++------ flex/engines/hqps_db/core/operator/sink.h | 14 +- flex/engines/hqps_db/core/params.h | 82 +++++ flex/engines/hqps_db/core/sync_engine.h | 25 ++ .../multi_vertex_set/general_vertex_set.h | 2 + .../queries/ic/adhoc/simple_match_12.cypher | 1 + flex/tests/hqps/match_query.h | 77 +++++ flex/tests/hqps/query_test.cc | 14 + .../suite/simple/SimpleMatchQueries.java | 14 + .../integration/ldbc/SimpleMatchTest.java | 7 + 13 files changed, 514 insertions(+), 123 deletions(-) create mode 100644 flex/resources/queries/ic/adhoc/simple_match_12.cypher diff --git a/.github/workflows/hqps-db-ci.yml b/.github/workflows/hqps-db-ci.yml index 91f128bcc33d..e9a36dca3845 100644 --- a/.github/workflows/hqps-db-ci.yml +++ b/.github/workflows/hqps-db-ci.yml @@ -122,7 +122,7 @@ jobs: eval ${cmd} done - for i in 1 2 3 4 5 6 7 8 9 10 11; + for i in 1 2 3 4 5 6 7 8 9 10 11 12; do cmd="./load_plan_and_gen.sh -e=hqps -i=../resources/queries/ic/adhoc/simple_match_${i}.cypher -w=/tmp/codgen/" cmd=${cmd}" -o=/tmp/plugin --ir_conf=${GS_TEST_DIR}/flex/ldbc-sf01-long-date/engine_config.yaml " diff --git a/flex/codegen/src/hqps/hqps_edge_expand_builder.h b/flex/codegen/src/hqps/hqps_edge_expand_builder.h index e11ff3e9ef0a..2c05e7498fbb 100644 --- a/flex/codegen/src/hqps/hqps_edge_expand_builder.h +++ b/flex/codegen/src/hqps/hqps_edge_expand_builder.h @@ -37,11 +37,16 @@ static constexpr const char* EDGE_EXPAND_V_OPT_FILTER_TEMPLATE_STR = static constexpr const char* EDGE_EXPAND_V_OPT_NO_FILTER_TEMPLATE_STR = "auto %1% = gs::make_edge_expandv_opt(%2%, %3%, %4%);\n"; -// This opt can be used by both edge expande and edge expandv. +// This opt can only be used by both edge expande, with multiple edge triplet. static constexpr const char* EDGE_EXPAND_E_OPT_MULTI_EDGE_NO_FILTER_TEMPLATE_STR = "auto %1% = gs::make_edge_expand_multie_opt<%2%>(%3%, %4%, %5%);\n"; +// This opt can only be used by both edge expandv, with multiplet edge triplet, +static constexpr const char* + EDGE_EXPAND_V_OPT_MULTI_EDGE_NO_FILTER_TEMPLATE_STR = + "auto %1% = gs::make_edge_expand_multiv_opt(%2%, %3%);\n"; + // Expand to Edges with Filter. // propNames, direction, edge_label, vertex_label, filter static constexpr const char* EDGE_EXPAND_E_OPT_FILTER_TEMPLATE_STR = @@ -122,6 +127,28 @@ std::string edge_label_triplet_to_array_str( return ss.str(); } +std::string edge_label_triplet_to_vector_str( + const std::vector>& edge_label_triplet) { + std::stringstream ss; + ss << "std::vector>{"; + for (int i = 0; i < edge_label_triplet.size(); ++i) { + ss << "std::array{"; + CHECK(edge_label_triplet[i].size() == 3); + for (int j = 0; j < edge_label_triplet[i].size(); ++j) { + ss << edge_label_triplet[i][j]; + if (j != edge_label_triplet[i].size() - 1) { + ss << ", "; + } + } + ss << "}"; + if (i != edge_label_triplet.size() - 1) { + ss << ", "; + } + } + ss << "}"; + return ss.str(); +} + std::string make_prop_tuple_array(const std::vector& prop_names, const std::vector& prop_types) { std::stringstream ss; @@ -364,22 +391,38 @@ static std::pair BuildMultiLabelEdgeExpandOpt( parse_prop_names_and_prop_types_from_ir_data_type(meta_data.type()); CHECK(prop_names.size() == prop_types.size()); + LOG(INFO) << "meta data: " << meta_data.DebugString(); std::vector> edge_label_triplet = parse_edge_label_triplet_from_ir_data_type(meta_data.type()); CHECK(edge_label_triplet.size() == prop_names.size()); LOG(INFO) << "Find multiple edge triplet: " << edge_label_triplet.size(); auto func_template_str = make_edge_expand_e_func_template_str(prop_types); - auto edge_triplet_2d_array = - edge_label_triplet_to_array_str(edge_label_triplet); auto edge_named_prop_array = make_prop_tuple_array_tuple(prop_names, prop_types); - boost::format formater(EDGE_EXPAND_E_OPT_MULTI_EDGE_NO_FILTER_TEMPLATE_STR); + boost::format formater; + if (expand_opt == + physical::EdgeExpand::ExpandOpt::EdgeExpand_ExpandOpt_EDGE) { + auto edge_triplet_2d_array = + edge_label_triplet_to_array_str(edge_label_triplet); + formater = + boost::format(EDGE_EXPAND_E_OPT_MULTI_EDGE_NO_FILTER_TEMPLATE_STR); + formater % opt_var_name % func_template_str % + gs::direction_pb_to_str(direction) % edge_triplet_2d_array % + edge_named_prop_array; + } else if (expand_opt == + physical::EdgeExpand::ExpandOpt::EdgeExpand_ExpandOpt_VERTEX) { + auto edge_triplet_2d_vector = + edge_label_triplet_to_vector_str(edge_label_triplet); + formater = + boost::format(EDGE_EXPAND_V_OPT_MULTI_EDGE_NO_FILTER_TEMPLATE_STR); + formater % opt_var_name % gs::direction_pb_to_str(direction) % + edge_triplet_2d_vector; + } else { + throw std::runtime_error("Unknown expand opt"); + } - formater % opt_var_name % func_template_str % - gs::direction_pb_to_str(direction) % edge_triplet_2d_array % - edge_named_prop_array; return std::make_pair(opt_var_name, formater.str()); } diff --git a/flex/codegen/src/hqps/hqps_path_expand_builder.h b/flex/codegen/src/hqps/hqps_path_expand_builder.h index f17d505aecef..1894c95a6a65 100644 --- a/flex/codegen/src/hqps/hqps_path_expand_builder.h +++ b/flex/codegen/src/hqps/hqps_path_expand_builder.h @@ -196,12 +196,26 @@ class PathExpandOpBuilder { auto expand_opt = edge_expand_pb.expand_opt(); CHECK(dst_vertex_labels_.size() > 0) << "no dst lables found"; - physical::PhysicalOpr::MetaData meta_data; - // pass an empty meta_data, since we need no meta_data for - // edge_expand_opt. - std::tie(edge_expand_opt_name_, edge_expand_opt_) = - BuildOneLabelEdgeExpandOpt(ctx_, direction_, params, - dst_vertex_labels_, expand_opt, meta_data); + if (params.tables().size() < 1) { + throw std::runtime_error("no edge labels found"); + } else if (params.tables().size() == 1) { + physical::PhysicalOpr::MetaData meta_data; + // pass an empty meta_data, since we need no meta_data for + std::tie(edge_expand_opt_name_, edge_expand_opt_) = + BuildOneLabelEdgeExpandOpt(ctx_, direction_, params, + dst_vertex_labels_, expand_opt, + meta_data); + } else { + // get the first meta_data + if (meta_data_pb.size() < 1) { + throw std::runtime_error("no meta_data found"); + } + auto& meta_data = meta_data_pb[0]; + std::tie(edge_expand_opt_name_, edge_expand_opt_) = + BuildMultiLabelEdgeExpandOpt(ctx_, direction_, params, expand_opt, + meta_data); + } + VLOG(10) << "edge_expand_opt_name_: " << edge_expand_opt_name_; VLOG(10) << "edge_expand_opt_: " << edge_expand_opt_; } diff --git a/flex/engines/hqps_db/core/operator/path_expand.h b/flex/engines/hqps_db/core/operator/path_expand.h index e55a400f0712..0aad0c3b72e7 100644 --- a/flex/engines/hqps_db/core/operator/path_expand.h +++ b/flex/engines/hqps_db/core/operator/path_expand.h @@ -245,12 +245,12 @@ class PathExpand { auto& range = path_expand_opt.range_; auto& edge_expand_opt = path_expand_opt.edge_expand_opt_; auto& get_v_opt = path_expand_opt.get_v_opt_; - auto& bitset = vertex_set.GetBitset(); auto src_label_vec = vertex_set.GetLabelVec(); + auto src_label_set = array_to_vec(vertex_set.GetLabels()); auto src_label_id_vec = label_key_vec_2_label_id_vec(src_label_vec); - return PathExpandMultiDstVFromTwoLabelSet(graph, vertex_set.GetVertices(), - src_label_id_vec, bitset, range, - edge_expand_opt, get_v_opt); + return PathExpandMultiDstVFromGeneralSet(graph, vertex_set.GetVertices(), + src_label_set, src_label_id_vec, + range, edge_expand_opt, get_v_opt); } // PathExpandV with multiple dst labels, for general vertex set as input, @@ -270,11 +270,96 @@ class PathExpand { auto& edge_expand_opt = path_expand_opt.edge_expand_opt_; auto& get_v_opt = path_expand_opt.get_v_opt_; auto& bitsets = vertex_set.GetBitsets(); + auto src_label_set = vertex_set.GetLabels(); auto src_label_vec = vertex_set.GetLabelVec(); auto src_label_id_vec = label_key_vec_2_label_id_vec(src_label_vec); return PathExpandMultiDstVFromGeneralSet(graph, vertex_set.GetVertices(), - src_label_id_vec, bitsets, range, - edge_expand_opt, get_v_opt); + src_label_set, src_label_id_vec, + range, edge_expand_opt, get_v_opt); + } + + // PathExpandV with multiple edge triplet as input, output no properties. + template < + typename VERTEX_SET_T, typename LabelT, typename VERTEX_FILTER_T, + typename EDGE_FILTER_T, size_t get_v_num_labels, + typename RES_SET_T = GeneralVertexSet, + typename RES_T = std::pair>> + static RES_T PathExpandVMultiTriplet( + const GRAPH_INTERFACE& graph, const VERTEX_SET_T& vertex_set, + PathExpandVMultiTripletOpt&& path_expand_opt) { + auto& range = path_expand_opt.range_; + auto& edge_expand_opt = path_expand_opt.edge_expand_opt_; + auto& get_v_opt = path_expand_opt.get_v_opt_; + auto& edge_triplets = edge_expand_opt.edge_label_triplets_; + auto& vertex_other_labels = get_v_opt.v_labels_; + auto vertex_other_labels_vec = array_to_vec(vertex_other_labels); + + std::vector res_vertices; + std::vector res_offsets; + std::vector res_dists; + std::vector res_labels_vec; + if constexpr (VERTEX_SET_T::is_row_vertex_set) { + auto src_label = vertex_set.GetLabel(); + auto& vertices_vec = vertex_set.GetVertices(); + std::vector src_labels_vec(vertices_vec.size(), + vertex_set.GetLabel()); + std::vector src_labels_set{src_label}; + // expand from row_vertex_set; + std::tie(res_vertices, res_dists, res_labels_vec, res_offsets) = + path_expandv_multi_triplet(graph, edge_triplets, + vertex_other_labels_vec, + edge_expand_opt.direction_, vertices_vec, + src_labels_set, src_labels_vec, range); + + } else if constexpr (VERTEX_SET_T::is_two_label_set) { + // expand from two_label_vertex_set; + auto& vertices_vec = vertex_set.GetVertices(); + auto src_label_vec = vertex_set.GetLabelVec(); + auto src_label_set = array_to_vec(vertex_set.GetLabels()); + auto src_label_id_vec = label_key_vec_2_label_id_vec(src_label_vec); + std::tie(res_vertices, res_dists, res_labels_vec, res_offsets) = + path_expandv_multi_triplet(graph, edge_triplets, + vertex_other_labels_vec, + edge_expand_opt.direction_, vertices_vec, + src_label_set, src_label_id_vec, range); + + } else if constexpr (VERTEX_SET_T::is_general_set) { + // expand from general_vertex_set; + auto src_label_set = vertex_set.GetLabels(); + auto src_label_vec = vertex_set.GetLabelVec(); + auto src_label_id_vec = label_key_vec_2_label_id_vec(src_label_vec); + std::tie(res_vertices, res_dists, res_labels_vec, res_offsets) = + path_expandv_multi_triplet( + graph, edge_triplets, vertex_other_labels_vec, + edge_expand_opt.direction_, vertex_set.GetVertices(), + src_label_set, src_label_id_vec, range); + } else { + // fail at compile time. + static_assert(VERTEX_SET_T::is_row_vertex_set || + VERTEX_SET_T::is_two_label_set || + VERTEX_SET_T::is_general_set, + "Unsupported vertex set type"); + } + + auto res_dist_tuple = single_col_vec_to_tuple_vec(std::move(res_dists)); + std::vector res_bitsets; + std::vector label_id_vec; + std::tie(res_bitsets, label_id_vec) = + convert_label_id_vec_to_bitsets(res_labels_vec); + if (label_id_vec.size() > vertex_other_labels.size()) { + LOG(ERROR) << "Error state: label_id_vec.size(): " << label_id_vec.size() + << ", vertex_other_labels.size(): " + << vertex_other_labels.size(); + } + if (res_bitsets.size() != label_id_vec.size()) { + LOG(ERROR) << "Error state: res_bitsets.size(): " << res_bitsets.size() + << ", label_id_vec.size(): " << label_id_vec.size(); + } + auto set = make_general_set( + std::move(res_vertices), std::move(res_dist_tuple), {"dist"}, + std::move(label_id_vec), std::move(res_bitsets)); + return std::make_pair(std::move(set), std::move(res_offsets)); } template @@ -467,10 +552,24 @@ class PathExpand { std::vector res_dists; std::vector res_labels_vec; std::vector src_labels_vec(vertices_vec.size(), src_label); + std::vector src_labels_set{src_label}; + std::vector> edge_triplets; + // Since other_labels is extracted when code generating, we reverse back. + if (edge_opt.direction_ == Direction::Out) { + for (auto& other_label : edge_other_labels) { + edge_triplets.emplace_back( + std::array{src_label, other_label, edge_label}); + } + } else { // In and both. + for (auto& other_label : edge_other_labels) { + edge_triplets.emplace_back( + std::array{other_label, src_label, edge_label}); + } + } std::tie(res_vertices, res_dists, res_labels_vec, res_offsets) = - path_expandv_from_src_label_with_multi_dst( - graph, edge_label, edge_other_labels_vec, vertex_other_labels_vec, - edge_opt.direction_, vertices_vec, src_labels_vec, range); + path_expandv_multi_triplet( + graph, edge_triplets, vertex_other_labels_vec, edge_opt.direction_, + vertices_vec, src_labels_set, src_labels_vec, range); auto res_dist_tuple = single_col_vec_to_tuple_vec(std::move(res_dists)); std::vector res_bitsets; std::vector label_id_vec; @@ -493,57 +592,13 @@ class PathExpand { // Expand V from two label vertices. // Collect multiple dst label vertices. - template - static auto PathExpandMultiDstVFromTwoLabelSet( - const GRAPH_INTERFACE& graph, - const std::vector& vertices_vec, - const std::vector& src_labels_vec, - const grape::Bitset& bitset, const Range& range, - const EdgeExpandOptMultiLabel& - edge_opt, - const GetVOpt& get_vopt) { - auto& edge_other_labels = edge_opt.other_labels_; - auto& vertex_other_labels = get_vopt.v_labels_; - auto edge_other_labels_vec = array_to_vec(edge_other_labels); - auto vertex_other_labels_vec = array_to_vec(vertex_other_labels); - auto edge_label = edge_opt.edge_label_; - std::vector res_vertices; - std::vector res_offsets; - std::vector res_dists; - std::vector res_labels_vec; - std::tie(res_vertices, res_dists, res_labels_vec, res_offsets) = - path_expandv_from_src_label_with_multi_dst( - graph, edge_label, edge_other_labels_vec, vertex_other_labels_vec, - edge_opt.direction_, vertices_vec, src_labels_vec, range); - auto res_dist_tuple = single_col_vec_to_tuple_vec(std::move(res_dists)); - std::vector res_bitsets; - std::vector label_id_vec; - std::tie(res_bitsets, label_id_vec) = - convert_label_id_vec_to_bitsets(res_labels_vec); - if (label_id_vec.size() > vertex_other_labels.size()) { - LOG(ERROR) << "Error state: label_id_vec.size(): " << label_id_vec.size() - << ", vertex_other_labels.size(): " - << vertex_other_labels.size(); - } - if (res_bitsets.size() != label_id_vec.size()) { - LOG(ERROR) << "Error state: res_bitsets.size(): " << res_bitsets.size() - << ", label_id_vec.size(): " << label_id_vec.size(); - } - auto set = make_general_set( - std::move(res_vertices), std::move(res_dist_tuple), {"dist"}, - std::move(label_id_vec), std::move(res_bitsets)); - return std::make_pair(std::move(set), std::move(res_offsets)); - } - - // PathExpandMultiDstVFromGeneralSet template static auto PathExpandMultiDstVFromGeneralSet( const GRAPH_INTERFACE& graph, const std::vector& vertices_vec, - const std::vector& src_labels_vec, - const std::vector& bitsets, const Range& range, + const std::vector& src_labels_set, + const std::vector& src_labels_vec, const Range& range, const EdgeExpandOptMultiLabel& edge_opt, const GetVOpt& get_vopt) { @@ -556,10 +611,26 @@ class PathExpand { std::vector res_offsets; std::vector res_dists; std::vector res_labels_vec; + std::vector> edge_triplets; + if (edge_opt.direction_ == Direction::Out) { + for (auto& src_label : src_labels_set) { + for (auto& other_label : edge_other_labels_vec) { + edge_triplets.emplace_back( + std::array{src_label, other_label, edge_label}); + } + } + } else { + for (auto& src_label : src_labels_set) { + for (auto& other_label : edge_other_labels_vec) { + edge_triplets.emplace_back( + std::array{other_label, src_label, edge_label}); + } + } + } std::tie(res_vertices, res_dists, res_labels_vec, res_offsets) = - path_expandv_from_src_label_with_multi_dst( - graph, edge_label, edge_other_labels_vec, vertex_other_labels_vec, - edge_opt.direction_, vertices_vec, src_labels_vec, range); + path_expandv_multi_triplet( + graph, edge_triplets, vertex_other_labels_vec, edge_opt.direction_, + vertices_vec, src_labels_set, src_labels_vec, range); auto res_dist_tuple = single_col_vec_to_tuple_vec(std::move(res_dists)); std::vector res_bitsets; std::vector label_id_vec; @@ -667,16 +738,24 @@ class PathExpand { return std::make_pair(std::move(path_set), std::move(ctx_offsets)); } - // expand from single src label to multiple dst labels. - static auto path_expandv_from_src_label_with_multi_dst( - const GRAPH_INTERFACE& graph, label_id_t edge_label, - const std::vector& other_labels, - const std::vector& - vertex_other_labels, // vertex_other_labels is used to filter - // vertices with other labels. - const Direction& direction, const std::vector& vertices_vec, + // expand from vertices, with multiple edge triplets. + // The intermediate vertices can also have multiple labels, and expand with + // multiple edge triplet. + static auto path_expandv_multi_triplet( + const GRAPH_INTERFACE& graph, + const std::vector>& + edge_label_triplets, // src, dst, edge + const std::vector& get_v_labels, const Direction& direction, + const std::vector& vertices_vec, + const std::vector& src_labels_set, const std::vector& src_v_labels_vec, const Range& range) { // (range, other_label_ind, vertices) + LOG(INFO) << "PathExpandV with multiple edge triplets: " + << gs::to_string(edge_label_triplets) + << ", direction: " << gs::to_string(direction) + << ", vertices size: " << vertices_vec.size() + << ", src_labels_set: " << gs::to_string(src_labels_set) + << ", range: " << range.start_ << ", " << range.limit_; std::vector> other_vertices; std::vector> other_labels_vec; std::vector> other_offsets; @@ -697,17 +776,21 @@ class PathExpand { // src_vertex_labels and other_labels. std::vector src_label_candidates; { - src_label_candidates.insert(src_label_candidates.end(), - src_v_labels_vec.begin(), - src_v_labels_vec.end()); - src_label_candidates.insert(src_label_candidates.end(), - other_labels.begin(), other_labels.end()); + // insert src and dst labels in edge_label_triplets to + // src_label_candidates + for (auto& edge_label_triplet : edge_label_triplets) { + auto src_label = edge_label_triplet[0]; + auto dst_label = edge_label_triplet[1]; + src_label_candidates.emplace_back(src_label); + src_label_candidates.emplace_back(dst_label); + } std::sort(src_label_candidates.begin(), src_label_candidates.end()); // dedup auto last = std::unique(src_label_candidates.begin(), src_label_candidates.end()); src_label_candidates.erase(last, src_label_candidates.end()); } + VLOG(10) << "src_label_candidates: " << gs::to_string(src_label_candidates); // iterate for all hops for (auto cur_hop = 1; cur_hop < range.limit_; ++cur_hop) { using nbr_list_type = @@ -715,37 +798,63 @@ class PathExpand { std::vector> nbr_lists; nbr_lists.resize(other_vertices[cur_hop - 1].size()); std::vector indicator(other_vertices[cur_hop - 1].size(), false); - // std::vector other_vertex_labels; - // other_vertex_labels.resize(other_vertices[cur_hop - 1].size()); + for (auto& src_other_label : src_label_candidates) { - std::vector indices; - - std::vector other_vertices_for_cur_label; - std::tie(other_vertices_for_cur_label, indices) = - get_vertices_with_label(other_vertices[cur_hop - 1], - other_labels_vec[cur_hop - 1], - src_other_label); - if (indices.size() > 0) { - VLOG(10) << "Get vertices with label: " - << std::to_string(src_other_label) << ", " - << other_vertices_for_cur_label.size(); - - for (auto other_label_ind = 0; other_label_ind < other_labels.size(); - ++other_label_ind) { - auto other_label = other_labels[other_label_ind]; + // for each kind of src vertices, try each edge triplet. + label_id_t dst_other_label; + for (auto& edge_triplet : edge_label_triplets) { + if (direction == Direction::In) { + if (src_other_label != edge_triplet[1]) { + continue; + } else { + dst_other_label = edge_triplet[0]; + } + } else if (direction == Direction::Out) { + if (src_other_label != edge_triplet[0]) { + continue; + } else { + dst_other_label = edge_triplet[1]; + } + } else { + // both + if (src_other_label != edge_triplet[0] && + src_other_label != edge_triplet[1]) { + continue; + } else { + if (src_other_label == edge_triplet[0]) { + dst_other_label = edge_triplet[1]; + } else { + dst_other_label = edge_triplet[0]; + } + } + } + auto cur_edge_label = edge_triplet[2]; + + std::vector indices; + + std::vector other_vertices_for_cur_label; + std::tie(other_vertices_for_cur_label, indices) = + get_vertices_with_label(other_vertices[cur_hop - 1], + other_labels_vec[cur_hop - 1], + src_other_label); + if (indices.size() > 0) { + VLOG(10) << "Get vertices with label: " + << std::to_string(src_other_label) << ", " + << other_vertices_for_cur_label.size(); + label_id_t real_src_label, real_dst_label; if (direction == Direction::Out) { real_src_label = src_other_label; - real_dst_label = other_label; + real_dst_label = dst_other_label; } else { // in or both. - real_src_label = other_label; + real_src_label = dst_other_label; real_dst_label = src_other_label; } - auto cur_nbr_list = - graph.GetOtherVertices(real_src_label, real_dst_label, - edge_label, other_vertices_for_cur_label, - gs::to_string(direction), INT_MAX); + auto cur_nbr_list = graph.GetOtherVertices( + real_src_label, real_dst_label, cur_edge_label, + other_vertices_for_cur_label, gs::to_string(direction), + INT_MAX); { size_t tmp_sum = 0; for (auto i = 0; i < cur_nbr_list.size(); ++i) { @@ -755,20 +864,23 @@ class PathExpand { << ", nbr size: " << tmp_sum << ", from: " << std::to_string(real_src_label) << ", to: " << std::to_string(real_dst_label) - << ", edge_label: " << std::to_string(edge_label) + << ", dst other_label: " + << std::to_string(dst_other_label) + << ", edge_label: " << std::to_string(cur_edge_label) << ", direction: " << gs::to_string(direction); } for (auto i = 0; i < indices.size(); ++i) { auto index = indices[i]; nbr_lists[index].emplace_back(cur_nbr_list.get_vector(i), - other_label); + dst_other_label); indicator[index] = true; } + + } else { + VLOG(10) << "No vertices with label: " + << std::to_string(src_other_label); } - } else { - VLOG(10) << "No vertices with label: " - << std::to_string(src_other_label); } } // extract vertices from nbrs, and add them to other_vertices[cur_hop] @@ -819,19 +931,19 @@ class PathExpand { std::vector res_dists; std::vector res_labels_vec; std::vector valid_labels(sizeof(label_id_t) * 8, false); - for (auto& v_label : vertex_other_labels) { + for (auto& v_label : get_v_labels) { valid_labels[v_label] = true; } auto num_valid_labels = std::accumulate(valid_labels.begin(), valid_labels.end(), 0); VLOG(10) << "Select vertices within " << num_valid_labels - << " valid labels, from " << other_labels.size(); + << " valid labels, from " << get_v_labels.size(); size_t flat_size = 0; for (auto i = range.start_; i < range.limit_; ++i) { flat_size += other_vertices[i].size(); } - VLOG(10) << "PathExpandV from single label, flat size: " << flat_size; + VLOG(10) << "PathExpandV with multiple triplet flat size: " << flat_size; res_vertices.reserve(flat_size); res_dists.reserve(flat_size); res_labels_vec.reserve(flat_size); diff --git a/flex/engines/hqps_db/core/operator/sink.h b/flex/engines/hqps_db/core/operator/sink.h index 23485a87f6f8..9181d6f07923 100644 --- a/flex/engines/hqps_db/core/operator/sink.h +++ b/flex/engines/hqps_db/core/operator/sink.h @@ -745,7 +745,7 @@ class SinkOp { auto& schema = graph.schema(); auto vertices_vec = vertex_set.GetVertices(); auto labels_vec = vertex_set.GetLabels(); - auto bitsets = vertex_set.GetBitsets(); + auto& bitsets = vertex_set.GetBitsets(); CHECK(vertices_vec.size() == labels_vec.size()); std::vector> prop_names; for (auto i = 0; i < labels_vec.size(); ++i) { @@ -791,9 +791,9 @@ class SinkOp { } mutable_vertex->mutable_label()->set_id(label); // label must be set - auto column_ptrs = column_ptrs[label]; - for (auto j = 0; j < column_ptrs.size(); ++j) { - auto& column_ptr = column_ptrs[j]; + auto cur_column_ptr = column_ptrs[label]; + for (auto j = 0; j < cur_column_ptr.size(); ++j) { + auto& column_ptr = cur_column_ptr[j]; // Only set non-none properties. if (column_ptr) { auto new_prop = mutable_vertex->add_properties(); @@ -823,9 +823,9 @@ class SinkOp { } mutable_vertex->mutable_label()->set_id(label); // label must be set - auto column_ptrs = column_ptrs[label]; - for (auto j = 0; j < column_ptrs.size(); ++j) { - auto& column_ptr = column_ptrs[j]; + auto cur_column_ptr = column_ptrs[label]; + for (auto j = 0; j < cur_column_ptr.size(); ++j) { + auto& column_ptr = cur_column_ptr[j]; // Only set non-none properties. if (column_ptr) { auto new_prop = mutable_vertex->add_properties(); diff --git a/flex/engines/hqps_db/core/params.h b/flex/engines/hqps_db/core/params.h index 51e7bf4c2627..b6bd8136ed5e 100644 --- a/flex/engines/hqps_db/core/params.h +++ b/flex/engines/hqps_db/core/params.h @@ -442,6 +442,22 @@ struct EdgeExpandOpt { Filter edge_filter_; }; +// EdgeExpand to vertices with multiple edge triplet. +// The edge triplet are in the form of +template +struct EdgeExpandVMultiTripletOpt { + EdgeExpandVMultiTripletOpt( + Direction dir, std::vector>&& edge_label_triplets, + EDGE_FILTER_FUNC&& edge_filter) + : direction_(dir), + edge_label_triplets_(std::move(edge_label_triplets)), + edge_filter_(std::move(edge_filter)) {} + + Direction direction_; + std::vector> edge_label_triplets_; + EDGE_FILTER_FUNC edge_filter_; +}; + template struct EdgeExpandEOpt; @@ -581,6 +597,23 @@ auto make_edge_expand_multie_opt( Filter()); } +// Expand with multiple edge triplet pairs. resulting vertices, prop names and +// prop types are not needed. +template +auto make_edge_expand_multiv_opt( + Direction dir, std::vector>&& edge_label_triplets, + FILTER_T&& func) { + return EdgeExpandVMultiTripletOpt( + dir, std::move(edge_label_triplets), std::move(func)); +} + +template +auto make_edge_expand_multiv_opt( + Direction dir, std::vector>&& edge_label_triplets) { + return EdgeExpandVMultiTripletOpt>( + dir, std::move(edge_label_triplets), Filter()); +} + // For edge expand with multiple labels. template struct EdgeExpandOptMultiLabel { @@ -765,6 +798,32 @@ struct PathExpandMultiDstOptImpl { ResultOpt result_opt_; // Get all vertices on Path or only ending vertices. }; +// Path expandv with multiple edge triplets. The src vertices can also contain +// many labels +template +struct PathExpandVMultiTripletOptImpl { + PathExpandVMultiTripletOptImpl( + EdgeExpandVMultiTripletOpt&& edge_expand_opt, + GetVOpt&& get_v_opt, + Range&& range, UNTIL_CONDITION&& until_condition, + PathOpt path_opt = PathOpt::Arbitrary, + ResultOpt result_opt = ResultOpt::EndV) + : edge_expand_opt_(std::move(edge_expand_opt)), + get_v_opt_(std::move(get_v_opt)), + range_(std::move(range)), + until_condition_(std::move(until_condition)), + path_opt_(path_opt), + result_opt_(result_opt) {} + + EdgeExpandVMultiTripletOpt edge_expand_opt_; + GetVOpt get_v_opt_; + Range range_; // Range for result vertices, default is [0,INT_MAX) + UNTIL_CONDITION until_condition_; + PathOpt path_opt_; // Single path or not. + ResultOpt result_opt_; // Get all vertices on Path or only ending vertices. +}; + template using PathExpandVOpt = PathExpandOptImpl, T...>; +template +using PathExpandVMultiTripletOpt = + PathExpandVMultiTripletOptImpl, + T...>; + template using PathExpandPOpt = PathExpandOptImpl>; @@ -788,6 +854,7 @@ using ShortestPathOpt = PathExpandOptImpl; +// make path expand opt with only one dst label. template auto make_path_expandv_opt( @@ -800,6 +867,7 @@ auto make_path_expandv_opt( Filter(), path_opt, result_opt); } +// make path expand opt with only one edge label, but multiple dst labels. template auto make_path_expandv_opt( @@ -814,6 +882,20 @@ auto make_path_expandv_opt( Filter(), path_opt, result_opt); } +// make path expand opt with multiple edge label triplet. +template +auto make_path_expandv_opt( + EdgeExpandVMultiTripletOpt&& edge_expand_opt, + GetVOpt&& get_v_opt, + Range&& range, PathOpt path_opt = PathOpt::Arbitrary, + ResultOpt result_opt = ResultOpt::EndV) { + return PathExpandVMultiTripletOpt( + std::move(edge_expand_opt), std::move(get_v_opt), std::move(range), + Filter(), path_opt, result_opt); +} + template auto make_path_expandp_opt( EdgeExpandOpt&& edge_expand_opt, diff --git a/flex/engines/hqps_db/core/sync_engine.h b/flex/engines/hqps_db/core/sync_engine.h index ea0d427913a1..a5abc9944232 100644 --- a/flex/engines/hqps_db/core/sync_engine.h +++ b/flex/engines/hqps_db/core/sync_engine.h @@ -449,6 +449,31 @@ class SyncEngine : public BaseEngine { // old context will be abandon here. } + template + static auto PathExpandV( + const GRAPH_INTERFACE& graph, + Context&& ctx, + PathExpandVMultiTripletOpt&& path_expand_opt) { + if (path_expand_opt.path_opt_ != PathOpt::Arbitrary) { + LOG(FATAL) << "Only support Arbitrary path now"; + } + if (path_expand_opt.result_opt_ != ResultOpt::EndV) { + LOG(FATAL) << "Only support EndV now"; + } + auto& select_node = gs::Get(ctx); + auto pair = PathExpand::PathExpandVMultiTriplet( + graph, select_node, std::move(path_expand_opt)); + + // create new context node, update offsets. + return ctx.template AddNode(std::move(pair.first), + std::move(pair.second), alias_to_use); + // old context will be abandon here. + } + /// Expand to Path template && vec, std::vector>&& data_vec, std::vector&& prop_names, @@ -928,6 +929,7 @@ class GeneralVertexSet { static constexpr bool is_general_set = true; static constexpr bool is_collection = false; static constexpr bool is_multi_label = false; + static constexpr bool is_row_vertex_set = false; GeneralVertexSet(std::vector&& vec, std::vector&& label_names, std::vector&& bitsets) : vec_(std::move(vec)), label_names_(std::move(label_names)) { diff --git a/flex/resources/queries/ic/adhoc/simple_match_12.cypher b/flex/resources/queries/ic/adhoc/simple_match_12.cypher new file mode 100644 index 000000000000..1cd3bcf9e4d3 --- /dev/null +++ b/flex/resources/queries/ic/adhoc/simple_match_12.cypher @@ -0,0 +1 @@ +MATCH(a)-[*1..2]->(b) WITH a.id AS aId, b.id AS bId RETURN aId, bId ORDER BY aId ASC, bId ASC LIMIT 10; \ No newline at end of file diff --git a/flex/tests/hqps/match_query.h b/flex/tests/hqps/match_query.h index 9b533bcedf48..b8ee5edbebbf 100644 --- a/flex/tests/hqps/match_query.h +++ b/flex/tests/hqps/match_query.h @@ -869,5 +869,82 @@ class MatchQuery14 : public HqpsAppBase { } }; +class MatchQuery15 : public HqpsAppBase { + public: + using Engine = SyncEngine; + using label_id_t = typename gs::MutableCSRInterface::label_id_t; + using vertex_id_t = typename gs::MutableCSRInterface::vertex_id_t; + // Query function for query class + results::CollectiveResults Query(const gs::MutableCSRInterface& graph) const { + auto expr0 = gs::make_filter(Query0expr0()); + auto ctx0 = Engine::template ScanVertex( + graph, std::array{0, 1, 2, 3, 4, 5, 6, 7}, + std::move(expr0)); + + auto edge_expand_opt1 = gs::make_edge_expand_multiv_opt( + gs::Direction::Out, std::vector>{ + std::array{2, 2, 2}, + std::array{2, 3, 2}, + std::array{1, 7, 6}, + std::array{6, 6, 13}, + std::array{4, 3, 3}, + std::array{2, 0, 7}, + std::array{1, 0, 7}, + std::array{3, 0, 7}, + std::array{5, 0, 7}, + std::array{1, 1, 8}, + std::array{1, 2, 9}, + std::array{1, 3, 9}, + std::array{0, 0, 11}, + std::array{7, 6, 12}, + std::array{2, 1, 0}, + std::array{3, 1, 0}, + std::array{1, 5, 10}, + std::array{4, 1, 4}, + std::array{1, 5, 14}, + std::array{3, 7, 1}, + std::array{4, 1, 5}}); + + auto get_v_opt0 = make_getv_opt( + gs::VOpt::Itself, + std::array{ + (label_id_t) 2, (label_id_t) 3, (label_id_t) 7, (label_id_t) 6, + (label_id_t) 3, (label_id_t) 0, (label_id_t) 0, (label_id_t) 0, + (label_id_t) 0, (label_id_t) 1, (label_id_t) 2, (label_id_t) 3, + (label_id_t) 0, (label_id_t) 6, (label_id_t) 1, (label_id_t) 1, + (label_id_t) 5, (label_id_t) 1, (label_id_t) 5, (label_id_t) 7, + (label_id_t) 1}); + + auto path_opt2 = gs::make_path_expandv_opt( + std::move(edge_expand_opt1), std::move(get_v_opt0), gs::Range(0, 2)); + auto ctx1 = Engine::PathExpandV( + graph, std::move(ctx0), std::move(path_opt2)); + auto ctx2 = Engine::Project( + graph, std::move(ctx1), + std::tuple{gs::make_mapper_with_variable( + gs::PropertySelector("id")), + gs::make_mapper_with_variable( + gs::PropertySelector("id"))}); + auto ctx3 = Engine::Project( + graph, std::move(ctx2), + std::tuple{gs::make_mapper_with_variable( + gs::PropertySelector("")), + gs::make_mapper_with_variable( + gs::PropertySelector(""))}); + auto ctx4 = Engine::Sort( + graph, std::move(ctx3), gs::Range(0, 10), + std::tuple{gs::OrderingPropPair(""), + gs::OrderingPropPair("")}); + return Engine::Sink(graph, ctx4, std::array{2, 3}); + } + // Wrapper query function for query class + results::CollectiveResults Query(const gs::MutableCSRInterface& graph, + Decoder& decoder) const override { + // decoding params from decoder, and call real query func + + return Query(graph); + } +}; + } // namespace gs #endif // TESTS_HQPS_MATCH_QUERY_H_ \ No newline at end of file diff --git a/flex/tests/hqps/query_test.cc b/flex/tests/hqps/query_test.cc index c803d3cdc357..04bfabf5643e 100644 --- a/flex/tests/hqps/query_test.cc +++ b/flex/tests/hqps/query_test.cc @@ -240,5 +240,19 @@ int main(int argc, char** argv) { LOG(INFO) << "Finish MatchQuery14 test"; } + { + // test PathExpand with multiple edge triplets. + gs::MatchQuery15 query; + std::vector encoder_array; + gs::Encoder input_encoder(encoder_array); + std::vector output_array; + gs::Encoder output(output_array); + gs::Decoder input(encoder_array.data(), encoder_array.size()); + + gs::MutableCSRInterface graph(sess); + query.Query(graph, input); + LOG(INFO) << "Finish MatchQuery14 test"; + } + LOG(INFO) << "Finish context test."; } \ No newline at end of file diff --git a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/integration/suite/simple/SimpleMatchQueries.java b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/integration/suite/simple/SimpleMatchQueries.java index c447b51a3e9d..b1de127971f4 100644 --- a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/integration/suite/simple/SimpleMatchQueries.java +++ b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/integration/suite/simple/SimpleMatchQueries.java @@ -166,4 +166,18 @@ public static QueryContext get_simple_match_query_12_test() { "Record<{placeId: 0, placeName: \"India\", postOrCommentId: 54974}>"); return new QueryContext(query, expected); } + + public static QueryContext get_simple_match_query_13_test() { + String query = + "MATCH(a)-[*1..2]->(b) WITH a.id AS aId, b.id AS bId RETURN aId, bId ORDER BY aId" + + " ASC, bId ASC LIMIT 5;"; + List expected = + Arrays.asList( + "Record<{aId: 0, bId: 3>", + "Record<{aId: 0, bId: 59>", + "Record<{aId: 0, bId: 349>", + "Record<{aId: 0, bId: 933>", + "Record<{aId: 0, bId: 1454>"); + return new QueryContext(query, expected); + } } diff --git a/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/cypher/integration/ldbc/SimpleMatchTest.java b/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/cypher/integration/ldbc/SimpleMatchTest.java index c05c8746c0a1..b8f105eadb9a 100644 --- a/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/cypher/integration/ldbc/SimpleMatchTest.java +++ b/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/cypher/integration/ldbc/SimpleMatchTest.java @@ -121,6 +121,13 @@ public void run_simple_match_12_test() { Assert.assertEquals(testQuery.getExpectedResult().toString(), result.list().toString()); } + @Test + public void run_simple_match_13_test() { + QueryContext testQuery = SimpleMatchQueries.get_simple_match_query_13_test(); + Result result = session.run(testQuery.getQuery()); + Assert.assertEquals(testQuery.getExpectedResult().toString(), result.list().toString()); + } + @AfterClass public static void afterClass() { if (session != null) { From 514aa151c75868daddd26ebc5857c48858e49db7 Mon Sep 17 00:00:00 2001 From: "xiaolei.zl" Date: Tue, 14 Nov 2023 19:53:39 +0800 Subject: [PATCH 5/6] fix CI --- .../integration/suite/simple/SimpleMatchQueries.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/integration/suite/simple/SimpleMatchQueries.java b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/integration/suite/simple/SimpleMatchQueries.java index b1de127971f4..2a5cf5894f9a 100644 --- a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/integration/suite/simple/SimpleMatchQueries.java +++ b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/integration/suite/simple/SimpleMatchQueries.java @@ -173,11 +173,11 @@ public static QueryContext get_simple_match_query_13_test() { + " ASC, bId ASC LIMIT 5;"; List expected = Arrays.asList( - "Record<{aId: 0, bId: 3>", - "Record<{aId: 0, bId: 59>", - "Record<{aId: 0, bId: 349>", - "Record<{aId: 0, bId: 933>", - "Record<{aId: 0, bId: 1454>"); + "Record<{aId: 0, bId: 3}>", + "Record<{aId: 0, bId: 59}>", + "Record<{aId: 0, bId: 349}>", + "Record<{aId: 0, bId: 933}>", + "Record<{aId: 0, bId: 1454}>"); return new QueryContext(query, expected); } } From 17db2635373a411290e36f86fae1d511bf8e56a0 Mon Sep 17 00:00:00 2001 From: "xiaolei.zl" Date: Wed, 15 Nov 2023 11:34:56 +0800 Subject: [PATCH 6/6] remove check --- flex/codegen/src/hqps/hqps_edge_expand_builder.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/flex/codegen/src/hqps/hqps_edge_expand_builder.h b/flex/codegen/src/hqps/hqps_edge_expand_builder.h index 2c05e7498fbb..eb1cedd36899 100644 --- a/flex/codegen/src/hqps/hqps_edge_expand_builder.h +++ b/flex/codegen/src/hqps/hqps_edge_expand_builder.h @@ -133,7 +133,9 @@ std::string edge_label_triplet_to_vector_str( ss << "std::vector>{"; for (int i = 0; i < edge_label_triplet.size(); ++i) { ss << "std::array{"; - CHECK(edge_label_triplet[i].size() == 3); + if (edge_label_triplet[i].size() != 3) { + throw std::runtime_error("edge label triplet size must be 3"); + } for (int j = 0; j < edge_label_triplet[i].size(); ++j) { ss << edge_label_triplet[i][j]; if (j != edge_label_triplet[i].size() - 1) { @@ -391,7 +393,6 @@ static std::pair BuildMultiLabelEdgeExpandOpt( parse_prop_names_and_prop_types_from_ir_data_type(meta_data.type()); CHECK(prop_names.size() == prop_types.size()); - LOG(INFO) << "meta data: " << meta_data.DebugString(); std::vector> edge_label_triplet = parse_edge_label_triplet_from_ir_data_type(meta_data.type()); CHECK(edge_label_triplet.size() == prop_names.size());