diff --git a/analytical_engine/core/loader/dynamic_to_arrow_converter.h b/analytical_engine/core/loader/dynamic_to_arrow_converter.h index 5ef5bbf1ca2c..93f0133dd585 100644 --- a/analytical_engine/core/loader/dynamic_to_arrow_converter.h +++ b/analytical_engine/core/loader/dynamic_to_arrow_converter.h @@ -291,6 +291,7 @@ struct COOBuilder {}; template struct COOBuilder { using oid_t = int32_t; + using vid_t = typename DST_FRAG_T::vid_t; using src_fragment_t = DynamicFragment; using dst_fragment_t = DST_FRAG_T; @@ -310,7 +311,7 @@ struct COOBuilder { continue; } auto u_oid = src_frag->GetId(u); - vineyard::property_graph_types::VID_TYPE u_gid; + vid_t u_gid; CHECK(dst_vm->GetGid(fid, 0, u_oid.GetInt(), u_gid)); @@ -320,7 +321,7 @@ struct COOBuilder { continue; } auto v_oid = src_frag->GetId(v); - vineyard::property_graph_types::VID_TYPE v_gid; + vid_t v_gid; CHECK(dst_vm->GetGid(0, v_oid.GetInt(), v_gid)); ARROW_OK_OR_RAISE(src_builder.Append(u_gid)); @@ -331,7 +332,7 @@ struct COOBuilder { auto& v = e.neighbor; if (src_frag->IsOuterVertex(v)) { auto v_oid = src_frag->GetId(v); - vineyard::property_graph_types::VID_TYPE v_gid; + vid_t v_gid; CHECK(dst_vm->GetGid(0, v_oid.GetInt(), v_gid)); ARROW_OK_OR_RAISE(src_builder.Append(v_gid)); @@ -355,6 +356,7 @@ struct COOBuilder { template struct COOBuilder { using oid_t = int64_t; + using vid_t = typename DST_FRAG_T::vid_t; using src_fragment_t = DynamicFragment; using dst_fragment_t = DST_FRAG_T; @@ -374,7 +376,7 @@ struct COOBuilder { continue; } auto u_oid = src_frag->GetId(u); - vineyard::property_graph_types::VID_TYPE u_gid; + vid_t u_gid; CHECK(dst_vm->GetGid(fid, 0, u_oid.GetInt64(), u_gid)); @@ -384,7 +386,7 @@ struct COOBuilder { continue; } auto v_oid = src_frag->GetId(v); - vineyard::property_graph_types::VID_TYPE v_gid; + vid_t v_gid; CHECK(dst_vm->GetGid(0, v_oid.GetInt64(), v_gid)); ARROW_OK_OR_RAISE(src_builder.Append(u_gid)); @@ -395,7 +397,7 @@ struct COOBuilder { auto& v = e.neighbor; if (src_frag->IsOuterVertex(v)) { auto v_oid = src_frag->GetId(v); - vineyard::property_graph_types::VID_TYPE v_gid; + vid_t v_gid; CHECK(dst_vm->GetGid(0, v_oid.GetInt64(), v_gid)); ARROW_OK_OR_RAISE(src_builder.Append(v_gid)); @@ -419,6 +421,7 @@ struct COOBuilder { template struct COOBuilder { using oid_t = int64_t; + using vid_t = typename DST_FRAG_T::vid_t; using src_fragment_t = DynamicFragment; using dst_fragment_t = DST_FRAG_T; @@ -438,7 +441,7 @@ struct COOBuilder { continue; } auto u_oid = src_frag->GetId(u); - vineyard::property_graph_types::VID_TYPE u_gid; + vid_t u_gid; CHECK(dst_vm->GetGid(fid, 0, u_oid.GetString(), u_gid)); @@ -448,7 +451,7 @@ struct COOBuilder { continue; } auto v_oid = src_frag->GetId(v); - vineyard::property_graph_types::VID_TYPE v_gid; + vid_t v_gid; CHECK(dst_vm->GetGid(0, v_oid.GetString(), v_gid)); ARROW_OK_OR_RAISE(src_builder.Append(u_gid)); @@ -459,7 +462,7 @@ struct COOBuilder { auto& v = e.neighbor; if (src_frag->IsOuterVertex(v)) { auto v_oid = src_frag->GetId(v); - vineyard::property_graph_types::VID_TYPE v_gid; + vid_t v_gid; CHECK(dst_vm->GetGid(0, v_oid.GetString(), v_gid)); ARROW_OK_OR_RAISE(src_builder.Append(v_gid)); @@ -606,14 +609,15 @@ class VertexMapConverter +template class DynamicToArrowConverter { using src_fragment_t = DynamicFragment; using oid_t = OID_T; using vid_t = typename src_fragment_t::vid_t; using vertex_map_t = VERTEX_MAP_T; using dst_fragment_t = - vineyard::ArrowFragment; + vineyard::ArrowFragment; using oid_array_t = typename vineyard::ConvertToArrowType::ArrayType; public: @@ -769,7 +773,7 @@ class DynamicToArrowConverter { std::vector> schema_vector = { std::make_shared("src", arrow::uint64()), std::make_shared("dst", arrow::uint64())}; - COOBuilder builder; + COOBuilder builder; BOOST_LEAF_AUTO(src_dst_array, builder.Build(src_frag, dst_vm)); std::shared_ptr src_array = src_dst_array.first, dst_array = src_dst_array.second; diff --git a/analytical_engine/frame/property_graph_frame.cc b/analytical_engine/frame/property_graph_frame.cc index 992f618e4202..6ca408c963df 100644 --- a/analytical_engine/frame/property_graph_frame.cc +++ b/analytical_engine/frame/property_graph_frame.cc @@ -213,9 +213,14 @@ ToArrowFragment(vineyard::Client& client, const grape::CommSpec& comm_spec, std::shared_ptr& wrapper_in, const std::string& dst_graph_name) { #ifdef NETWORKX - static_assert(std::is_same::value, - "The type of ArrowFragment::vid_t does not match with the " - "DynamicFragment::vid_t"); + if (!std::is_same::value) { + RETURN_GS_ERROR(vineyard::ErrorCode::kInvalidValueError, + "The type of vid_t '" + vineyard::type_name() + + "' does not match with the " + "DynamicFragment::vid_t '" + + vineyard::type_name() + + "'"); + } if (wrapper_in->graph_def().graph_type() != gs::rpc::graph::DYNAMIC_PROPERTY) { @@ -254,7 +259,7 @@ ToArrowFragment(vineyard::Client& client, const grape::CommSpec& comm_spec, std::string(vineyard::type_name())); } - gs::DynamicToArrowConverter converter( + gs::DynamicToArrowConverter converter( comm_spec, client); BOOST_LEAF_AUTO(arrow_frag, converter.Convert(dynamic_frag)); VINEYARD_CHECK_OK(client.Persist(arrow_frag->id())); diff --git a/analytical_engine/test/test_convert.cc b/analytical_engine/test/test_convert.cc index 46b1ae518a0e..aeb7b3469f9d 100644 --- a/analytical_engine/test/test_convert.cc +++ b/analytical_engine/test/test_convert.cc @@ -96,7 +96,7 @@ int main(int argc, char** argv) { BOOST_LEAF_AUTO(dynamic_frag, a2d_converter.Convert(arrow_frag)); LOG(INFO) << "ArrowFragment->DynamicFragment done."; - gs::DynamicToArrowConverter d2a_converter( + gs::DynamicToArrowConverter d2a_converter( comm_spec, client); BOOST_LEAF_AUTO(arrow_frag1, d2a_converter.Convert(dynamic_frag)); LOG(INFO) << "DynamicFragment->ArrowFragment done."; diff --git a/coordinator/gscoordinator/op_executor.py b/coordinator/gscoordinator/op_executor.py index a4ef6a3dcc61..890ba9597051 100644 --- a/coordinator/gscoordinator/op_executor.py +++ b/coordinator/gscoordinator/op_executor.py @@ -110,7 +110,7 @@ def _generate_runstep_request(session_id, dag_def, dag_bodies): # TODO: make the stacktrace separated from normal error messages # Too verbose. if len(e.details()) > 3072: # 3k bytes - msg = f"{e.details()[:256]} ... [truncated]" + msg = f"{e.details()[:1024]} ... [truncated]" else: msg = e.details() raise AnalyticalEngineInternalError(msg) diff --git a/coordinator/gscoordinator/template/CMakeLists.template b/coordinator/gscoordinator/template/CMakeLists.template index cfc44958c23a..ff8827e331d9 100644 --- a/coordinator/gscoordinator/template/CMakeLists.template +++ b/coordinator/gscoordinator/template/CMakeLists.template @@ -340,6 +340,7 @@ if (CYTHON_PREGEL_APP) add_library(${FRAME_NAME} SHARED ${FILES_NEED_COMPILE} ${ANALYTICAL_ENGINE_FRAME_DIR}/cython_app_frame.cc) target_compile_definitions(${FRAME_NAME} PRIVATE _OID_TYPE=$_oid_type + _VID_TYPE=$_vid_type _VD_TYPE=$_vd_type _MD_TYPE=$_md_type _MODULE_NAME=$_module_name @@ -353,6 +354,7 @@ elseif (CYTHON_PIE_APP) add_library(${FRAME_NAME} SHARED ${FILES_NEED_COMPILE} ${ANALYTICAL_ENGINE_FRAME_DIR}/cython_pie_app_frame.cc) target_compile_definitions(${FRAME_NAME} PRIVATE _OID_TYPE=$_oid_type + _VID_TYPE=$_vid_type _VD_TYPE=$_vd_type _MD_TYPE=$_md_type _MODULE_NAME=$_module_name diff --git a/coordinator/gscoordinator/utils.py b/coordinator/gscoordinator/utils.py index 49ded0164cb0..d0a8d44a228d 100644 --- a/coordinator/gscoordinator/utils.py +++ b/coordinator/gscoordinator/utils.py @@ -205,7 +205,7 @@ def get_app_sha256(attr, java_class_path: str): java_jar_path, java_app_class, ) = _codegen_app_info(attr, DEFAULT_GS_CONFIG_FILE, java_class_path) - graph_header, graph_type, _ = _codegen_graph_info(attr) + graph_header, graph_type, _, _ = _codegen_graph_info(attr) logger.info( "app type: %s (%s), graph type: %s (%s)", app_class, @@ -241,7 +241,7 @@ def get_app_sha256(attr, java_class_path: str): def get_graph_sha256(attr): - _, graph_class, _ = _codegen_graph_info(attr) + _, graph_class, _, _ = _codegen_graph_info(attr) return hashlib.sha256(graph_class.encode("utf-8", errors="ignore")).hexdigest() @@ -445,7 +445,7 @@ def compile_app( str(java_app_class), ) - graph_header, graph_type, graph_oid_type = _codegen_graph_info(attr) + graph_header, graph_type, graph_oid_type, graph_vid_type = _codegen_graph_info(attr) if app_type == "java_pie": logger.info( "Check consistent between java app %s and graph %s", @@ -556,6 +556,7 @@ def compile_app( _analytical_engine_home=ANALYTICAL_ENGINE_HOME, _frame_name=library_name, _oid_type=graph_oid_type, + _vid_type=graph_vid_type, _vd_type=vd_type, _md_type=md_type, _graph_type=graph_type, @@ -599,7 +600,7 @@ def compile_graph_frame( None: for consistency with compile_app. """ logger.info("Building graph library ...") - _, graph_class, _ = _codegen_graph_info(attr) + _, graph_class, _, _ = _codegen_graph_info(attr) library_dir = os.path.join(workspace, library_name) os.makedirs(library_dir, exist_ok=True) @@ -782,7 +783,11 @@ def _pre_process_for_bind_app_op(op, op_result_pool, key_to_op, **kwargs): ) ) op.attr[types_pb2.VID_TYPE].CopyFrom( - utils.s_to_attr(utils.data_type_to_cpp(vy_info.vid_type)) + utils.s_to_attr( + utils.normalize_data_type_str( + utils.data_type_to_cpp(vy_info.vid_type) + ) + ) ) op.attr[types_pb2.V_DATA_TYPE].CopyFrom( utils.s_to_attr(utils.data_type_to_cpp(vy_info.vdata_type)) @@ -1735,7 +1740,7 @@ def compact_edges(): raise ValueError( f"Unknown graph type: {graph_def_pb2.GraphTypePb.Name(graph_type)}" ) - return graph_header, graph_fqn, oid_type() + return graph_header, graph_fqn, oid_type(), vid_type() def create_single_op_dag(op_type, config=None): diff --git a/docs/analytical_engine/performance_tuning.md b/docs/analytical_engine/performance_tuning.md index 3e8c70c3ad65..4c878a997243 100644 --- a/docs/analytical_engine/performance_tuning.md +++ b/docs/analytical_engine/performance_tuning.md @@ -49,8 +49,8 @@ its property graphs. Basically, the `ArrowFragment` has the following members: first the neighbor vertex id and the second is the index points to the corresponding edge table. - By default, the type of `neighbor_vertex_id` is `uint64_t` and the type of - `edge_table_index` is `size_t`. + By default, the type of `neighbor_vertex_id` is `uint64_t` or `uint32_t` and + the type of `edge_table_index` is `size_t`. The size of the `indptr` array is `num_edges`. @@ -70,8 +70,8 @@ its property graphs. Basically, the `ArrowFragment` has the following members: first the neighbor vertex id and the second is the index points to the corresponding edge table. - By default, the type of `neighbor_vertex_id` is `uint64_t` and the type of - `edge_table_index` is `size_t`. + By default, the type of `neighbor_vertex_id` is `uint64_t` or `uint32_t` and + the type of `edge_table_index` is `size_t`. The size of the `indptr` array is `num_edges`. @@ -152,6 +152,15 @@ footprint as follows: - Optimizing topologies: + - GraphScope uses `uint64_t` as the `VID_T` (internal vertex id) to support large-scale + graphs. However, from above analysis, the type of `VID_T` is one of the key factors + that affects the memory footprint of the topology part. + + If you are sure your graph is fairly small (less than `10^8` of vertices, the absolute + value depends on number of labels and number of partitions), you can use `int32_t` + as the `VID_T` to optimize the memory usage, by `vid_type="int32_t"` option in + `graphscope.g()` and `graphscope.load_from()`.s + - GraphScope supports options `compact_edges=True` in `graphscope.g()` and `graphscope.load_from()` to compact the `ie_lists` and `oe_lists` arrays using delta and varint encoding. Such compression can half the memory footprint of the topology part, but has overhead in computation during diff --git a/python/graphscope/client/session.py b/python/graphscope/client/session.py index 7ff3d0fc9b9e..68aec6dd0c3b 100755 --- a/python/graphscope/client/session.py +++ b/python/graphscope/client/session.py @@ -1251,6 +1251,7 @@ def g( self, incoming_data=None, oid_type="int64", + vid_type="uint64", directed=True, generate_eid=True, retain_oid=True, @@ -1258,6 +1259,26 @@ def g( compact_edges=False, use_perfect_hash=False, ) -> Union[Graph, GraphDAGNode]: + """Construct a GraphScope graph object on the default session. + + It will launch and set a session to default when there is no default session found. + + See params detail in :class:`graphscope.framework.graph.GraphDAGNode` + + Returns: + :class:`graphscope.framework.graph.GraphDAGNode`: Evaluated in eager mode. + + Examples: + + .. code:: python + + >>> import graphscope + >>> g = graphscope.g() + + >>> import graphscope + >>> sess = graphscope.session() + >>> g = sess.g() # creating graph on the session "sess" + """ if ( isinstance(incoming_data, vineyard.ObjectID) and repr(incoming_data) in self._vineyard_object_mapping_table @@ -1270,6 +1291,7 @@ def g( self, incoming_data, oid_type, + vid_type, directed, generate_eid, retain_oid, @@ -1698,6 +1720,7 @@ def get_controller(self, default): def g( incoming_data=None, oid_type="int64", + vid_type="uint64", directed=True, generate_eid=True, retain_oid=True, @@ -1729,6 +1752,7 @@ def g( return get_default_session().g( incoming_data, oid_type, + vid_type, directed, generate_eid, retain_oid, diff --git a/python/graphscope/framework/dag_utils.py b/python/graphscope/framework/dag_utils.py index b27e96a80f24..f8fe467a516e 100644 --- a/python/graphscope/framework/dag_utils.py +++ b/python/graphscope/framework/dag_utils.py @@ -203,12 +203,12 @@ def add_labels_to_graph(graph, loader_op): types_pb2.GRAPH_TYPE: utils.graph_type_to_attr(graph._graph_type), types_pb2.DIRECTED: utils.b_to_attr(graph._directed), types_pb2.OID_TYPE: utils.s_to_attr(graph._oid_type), + types_pb2.VID_TYPE: utils.s_to_attr(graph._vid_type), types_pb2.GENERATE_EID: utils.b_to_attr(graph._generate_eid), types_pb2.RETAIN_OID: utils.b_to_attr(graph._retain_oid), types_pb2.VERTEX_MAP_TYPE: utils.i_to_attr(graph._vertex_map), types_pb2.COMPACT_EDGES: utils.b_to_attr(graph._compact_edges), types_pb2.USE_PERFECT_HASH: utils.b_to_attr(graph._use_perfect_hash), - types_pb2.VID_TYPE: utils.s_to_attr("uint64_t"), types_pb2.IS_FROM_VINEYARD_ID: utils.b_to_attr(False), types_pb2.IS_FROM_GAR: utils.b_to_attr(False), } @@ -250,12 +250,12 @@ def consolidate_columns( types_pb2.GRAPH_TYPE: utils.graph_type_to_attr(graph._graph_type), types_pb2.DIRECTED: utils.b_to_attr(graph._directed), types_pb2.OID_TYPE: utils.s_to_attr(graph._oid_type), + types_pb2.VID_TYPE: utils.s_to_attr(graph._vid_type), types_pb2.GENERATE_EID: utils.b_to_attr(graph._generate_eid), types_pb2.RETAIN_OID: utils.b_to_attr(graph._retain_oid), types_pb2.VERTEX_MAP_TYPE: utils.i_to_attr(graph._vertex_map), types_pb2.COMPACT_EDGES: utils.b_to_attr(graph._compact_edges), types_pb2.USE_PERFECT_HASH: utils.b_to_attr(graph._use_perfect_hash), - types_pb2.VID_TYPE: utils.s_to_attr("uint64_t"), types_pb2.IS_FROM_VINEYARD_ID: utils.b_to_attr(False), types_pb2.IS_FROM_GAR: utils.b_to_attr(False), types_pb2.CONSOLIDATE_COLUMNS_LABEL: utils.s_to_attr(label), @@ -1094,7 +1094,7 @@ def archive_graph(graph, path): config = { types_pb2.GRAPH_TYPE: utils.graph_type_to_attr(graph._graph_type), types_pb2.OID_TYPE: utils.s_to_attr(graph._oid_type), - types_pb2.VID_TYPE: utils.s_to_attr("uint64_t"), + types_pb2.VID_TYPE: utils.s_to_attr(graph._vid_type), types_pb2.VERTEX_MAP_TYPE: utils.i_to_attr(graph._vertex_map), types_pb2.COMPACT_EDGES: utils.b_to_attr(graph._compact_edges), types_pb2.USE_PERFECT_HASH: utils.b_to_attr(graph._use_perfect_hash), diff --git a/python/graphscope/framework/graph.py b/python/graphscope/framework/graph.py index 911d3051f8e6..281ca3ddf54d 100644 --- a/python/graphscope/framework/graph.py +++ b/python/graphscope/framework/graph.py @@ -61,6 +61,7 @@ def __init__(self): self._generate_eid = True self._retain_oid = True self._oid_type = "int64" + self._vid_type = "uint64" self._vertex_map = graph_def_pb2.GLOBAL_VERTEX_MAP self._compact_edges = False self._use_perfect_hash = False @@ -208,7 +209,7 @@ def _construct_op_of_empty_graph(self): config[types_pb2.GENERATE_EID] = utils.b_to_attr(self._generate_eid) config[types_pb2.RETAIN_OID] = utils.b_to_attr(self._retain_oid) config[types_pb2.OID_TYPE] = utils.s_to_attr(self._oid_type) - config[types_pb2.VID_TYPE] = utils.s_to_attr("uint64_t") + config[types_pb2.VID_TYPE] = utils.s_to_attr(self._vid_type) config[types_pb2.IS_FROM_VINEYARD_ID] = utils.b_to_attr(False) config[types_pb2.IS_FROM_GAR] = utils.b_to_attr(False) config[types_pb2.VERTEX_MAP_TYPE] = utils.i_to_attr(self._vertex_map) @@ -253,6 +254,7 @@ def __init__( session, incoming_data=None, oid_type="int64", + vid_type="uint64", directed=True, generate_eid=True, retain_oid=True, @@ -273,6 +275,7 @@ def __init__( - :class:`vineyard.Object`, :class:`vineyard.ObjectId` or :class:`vineyard.ObjectName` oid_type: (str, optional): Type of vertex original id. Defaults to "int64". + vid_type: (str, optional): Type of vertex internal id. Defaults to "uint64". directed: (bool, optional): Directed graph or not. Defaults to True. generate_eid: (bool, optional): Generate id for each edge when set True. Defaults to True. retain_oid: (bool, optional): Keep original ID in vertex table when set True. Defaults to True. @@ -289,7 +292,11 @@ def __init__( oid_type = utils.normalize_data_type_str(oid_type) if oid_type not in ("int32_t", "int64_t", "std::string"): raise ValueError("oid_type can only be int32_t, int64_t or string.") + vid_type = utils.normalize_data_type_str(vid_type) + if vid_type not in ("uint32_t", "uint64_t"): + raise ValueError("vid_type can only be uint32_t or uint64_t.") self._oid_type = oid_type + self._vid_type = vid_type self._directed = directed self._generate_eid = generate_eid self._retain_oid = retain_oid @@ -353,6 +360,10 @@ def graph_type(self): def oid_type(self): return utils.normalize_data_type_str(self._oid_type) + @property + def vid_type(self): + return utils.normalize_data_type_str(self._vid_type) + def _project_to_simple(self, v_prop=None, e_prop=None): check_argument(self.graph_type == graph_def_pb2.ARROW_PROPERTY) op = dag_utils.project_to_simple(self, str(v_prop), str(e_prop)) @@ -361,6 +372,7 @@ def _project_to_simple(self, v_prop=None, e_prop=None): self._session, op, self._oid_type, + self._vid_type, self._directed, self._generate_eid, self._retain_oid, @@ -519,6 +531,7 @@ def add_vertices( self._session, op, self._oid_type, + self._vid_type, self._directed, self._generate_eid, self._retain_oid, @@ -681,6 +694,7 @@ def add_edges( self._session, op, self._oid_type, + self._vid_type, self._directed, self._generate_eid, self._retain_oid, @@ -732,6 +746,7 @@ def consolidate_columns( self._session, op, self._oid_type, + self._vid_type, self._directed, self._generate_eid, self._retain_oid, @@ -838,6 +853,7 @@ def project( self._session, op, self._oid_type, + self._vid_type, self._directed, self._generate_eid, self._retain_oid, @@ -918,6 +934,7 @@ def update_from_graph_def(self, graph_def): self._vineyard_id = vy_info.vineyard_id self._fragments = list(vy_info.fragments) self._oid_type = data_type_to_cpp(vy_info.oid_type) + self._vid_type = data_type_to_cpp(vy_info.vid_type) self._generate_eid = vy_info.generate_eid self._retain_oid = vy_info.retain_oid @@ -973,11 +990,15 @@ def op(self): def oid_type(self): return self._graph_node.oid_type + @property + def vid_type(self): + return self._graph_node.vid_type + @property def template_str(self): # transform str/string to std::string oid_type = utils.normalize_data_type_str(self._oid_type) - vid_type = utils.data_type_to_cpp(self._schema._vid_type) + vid_type = utils.normalize_data_type_str(self._vid_type) vdata_type = utils.data_type_to_cpp(self._schema.vdata_type) edata_type = utils.data_type_to_cpp(self._schema.edata_type) vertex_map_type = utils.vertex_map_type_to_cpp(self._vertex_map) diff --git a/python/graphscope/framework/graph_builder.py b/python/graphscope/framework/graph_builder.py index 983594c195af..fc1a6e4cb07b 100644 --- a/python/graphscope/framework/graph_builder.py +++ b/python/graphscope/framework/graph_builder.py @@ -52,6 +52,7 @@ def load_from( ] = None, directed=True, oid_type="int64_t", + vid_type="uint64_t", generate_eid=True, retain_oid=True, vformat=None, @@ -153,6 +154,8 @@ def load_from( directed (bool, optional): Indicate whether the graph should be treated as directed or undirected. oid_type (str, optional): ID type of graph. Can be "int32_t", "int64_t" or "string". Defaults to "int64_t". + vid_type (str, optional): Internal vertex ID type of graph. Can be "uint32_t" and "uint64_t". + Defaults to "uint64_t". generate_eid (bool, optional): Whether to generate a unique edge id for each edge. Generated eid will be placed in third column. This feature is for cooperating with interactive engine. If you only need to work with analytical engine, set it to False. Defaults to True. @@ -177,6 +180,9 @@ def load_from( oid_type = utils.normalize_data_type_str(oid_type) if oid_type not in ("int32_t", "int64_t", "std::string"): raise ValueError("oid_type can only be int32_t, int64_t or string.") + vid_type = utils.normalize_data_type_str(vid_type) + if vid_type not in ("uint32_t", "uint64_t"): + raise ValueError("vid_type can only be uint32_t or uint64_t.") v_labels = normalize_parameter_vertices(vertices, oid_type, vformat) e_labels = normalize_parameter_edges(edges, oid_type, eformat) # generate and add a loader op to dag @@ -187,9 +193,9 @@ def load_from( config = { types_pb2.DIRECTED: utils.b_to_attr(directed), types_pb2.OID_TYPE: utils.s_to_attr(oid_type), + types_pb2.VID_TYPE: utils.s_to_attr(vid_type), types_pb2.GENERATE_EID: utils.b_to_attr(generate_eid), types_pb2.RETAIN_OID: utils.b_to_attr(retain_oid), - types_pb2.VID_TYPE: utils.s_to_attr("uint64_t"), types_pb2.IS_FROM_VINEYARD_ID: utils.b_to_attr(False), types_pb2.IS_FROM_GAR: utils.b_to_attr(False), types_pb2.VERTEX_MAP_TYPE: utils.i_to_attr(vertex_map), @@ -202,6 +208,7 @@ def load_from( graph = sess.g( op, oid_type=oid_type, + vid_type=vid_type, directed=directed, generate_eid=generate_eid, retain_oid=retain_oid, @@ -216,6 +223,7 @@ def load_from_gar( graph_info_path: str, directed=True, oid_type="int64_t", + vid_type="uint64_t", vertex_map="global", compact_edges=False, use_perfect_hash=False, @@ -224,6 +232,9 @@ def load_from_gar( oid_type = utils.normalize_data_type_str(oid_type) if oid_type not in ("int32_t", "int64_t", "std::string"): raise ValueError("The 'oid_type' can only be int32_t, int64_t or string.") + vid_type = utils.normalize_data_type_str(vid_type) + if vid_type not in ("uint32_t", "uint64_t"): + raise ValueError("The 'vid_type' can only be uint32_t or uint64_t.") if compact_edges: raise ValueError( "Loading from gar with 'compact_edges' hasn't been supported yet." @@ -238,9 +249,9 @@ def load_from_gar( config = { types_pb2.DIRECTED: utils.b_to_attr(directed), types_pb2.OID_TYPE: utils.s_to_attr(oid_type), + types_pb2.VID_TYPE: utils.s_to_attr(vid_type), types_pb2.GENERATE_EID: utils.b_to_attr(False), types_pb2.RETAIN_OID: utils.b_to_attr(False), - types_pb2.VID_TYPE: utils.s_to_attr("uint64_t"), types_pb2.IS_FROM_VINEYARD_ID: utils.b_to_attr(False), types_pb2.IS_FROM_GAR: utils.b_to_attr(True), types_pb2.VERTEX_MAP_TYPE: utils.i_to_attr(vertex_map), @@ -254,6 +265,7 @@ def load_from_gar( graph = sess.g( op, oid_type=oid_type, + vid_type=vid_type, directed=directed, vertex_map=vertex_map, compact_edges=compact_edges, diff --git a/python/graphscope/nx/classes/graph.py b/python/graphscope/nx/classes/graph.py index 4f1cda4252d7..0d186ba6172e 100644 --- a/python/graphscope/nx/classes/graph.py +++ b/python/graphscope/nx/classes/graph.py @@ -467,13 +467,17 @@ def template_str(self): oid_type = utils.normalize_data_type_str( utils.data_type_to_cpp(self._schema.oid_type) ) - vid_type = self._schema.vid_type + vid_type = utils.normalize_data_type_str( + utils.data_type_to_cpp(self._schema.vid_type) + ) s = f"vineyard::ArrowFragment<{oid_type},{vid_type}>" elif self._graph_type == graph_def_pb2.ARROW_FLATTENED: oid_type = utils.normalize_data_type_str( utils.data_type_to_cpp(self._schema.oid_type) ) - vid_type = self._schema.vid_type + oid_type = utils.normalize_data_type_str( + utils.data_type_to_cpp(self._schema.vid_type) + ) vdata_type = utils.data_type_to_cpp(self._schema.vdata_type) edata_type = utils.data_type_to_cpp(self._schema.edata_type) s = f"gs::ArrowFlattenedFragment<{oid_type},{vid_type},{vdata_type},{edata_type}>" @@ -481,7 +485,9 @@ def template_str(self): oid_type = utils.normalize_data_type_str( utils.data_type_to_cpp(self._schema.oid_type) ) - vid_type = self._schema.vid_type + oid_type = utils.normalize_data_type_str( + utils.data_type_to_cpp(self._schema.vid_type) + ) vdata_type = utils.data_type_to_cpp(self._schema.vdata_type) edata_type = utils.data_type_to_cpp(self._schema.edata_type) s = f"gs::ArrowProjectedFragment<{oid_type},{vid_type},{vdata_type},{edata_type}>" diff --git a/python/graphscope/tests/conftest.py b/python/graphscope/tests/conftest.py index 3439609c0218..bd0e022ba044 100644 --- a/python/graphscope/tests/conftest.py +++ b/python/graphscope/tests/conftest.py @@ -462,6 +462,22 @@ def p2p_property_graph_int32(graphscope_session): del g +@pytest.fixture(scope="module") +def p2p_property_graph_uint32_vid(graphscope_session): + g = graphscope_session.g( + vid_type="uint32", generate_eid=False, retain_oid=True, directed=True + ) + g = g.add_vertices(f"{property_dir}/p2p-31_property_v_0", "person") + g = g.add_edges( + f"{property_dir}/p2p-31_property_e_0", + label="knows", + src_label="person", + dst_label="person", + ) + yield g + del g + + @pytest.fixture(scope="module") def p2p_property_graph_undirected(graphscope_session): g = graphscope_session.g(directed=False, generate_eid=False, retain_oid=False) diff --git a/python/graphscope/tests/unittest/test_graph.py b/python/graphscope/tests/unittest/test_graph.py index 57c9c5d3fbf7..82b9dba64b0b 100644 --- a/python/graphscope/tests/unittest/test_graph.py +++ b/python/graphscope/tests/unittest/test_graph.py @@ -659,6 +659,18 @@ def test_add_column_int32_oid( assert "pagerank" in property_names +def test_uint32_vid_graph(p2p_property_graph_uint32_vid): + g = p2p_property_graph_uint32_vid + assert g.oid_type == "int64_t" + assert g.vid_type == "uint32_t" + + +def test_uint64_vid_graph(p2p_property_graph): + g = p2p_property_graph + assert g.oid_type == "int64_t" + assert g.vid_type == "uint64_t" + + def test_graph_lifecycle(graphscope_session): graph = load_p2p_network(graphscope_session) c = graphscope.wcc(graph)