Skip to content

Commit 45a4dd3

Browse files
committed
impl new schema reading
1 parent 8608dc0 commit 45a4dd3

21 files changed

+774
-339
lines changed

.github/workflows/flex.yml

+9
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,12 @@ jobs:
5656
cmake .. && sudo make -j$(nproc)
5757
export FLEX_DATA_DIR=../../../../storages/rt_mutable_graph/modern_graph/
5858
./run_grin_test
59+
60+
- name: Test Graph Loading
61+
env:
62+
FLEX_DATA_DIR: ${GITHUB_WORKSPACE}/flex/build/
63+
run: |
64+
cd ${GITHUB_WORKSPACE}/flex/storages/rt_mutable_graph/modern_graph/
65+
GLOG_v=10 ./tests/rt_mutable_graph/test_graph_loading \
66+
../storages/rt_mutable_graph/modern_graph/modern_graph_new.yaml
67+
../storages/rt_mutable_graph/modern_graph/bulk_load_new.yaml /tmp/csr-data-dir/

flex/bin/rt_server.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ int main(int argc, char** argv) {
8484

8585
auto ret = gs::Schema::LoadFromYaml(graph_schema_path, bulk_load_config_path);
8686
db.Init(std::get<0>(ret), std::get<1>(ret), std::get<2>(ret),
87-
std::get<3>(ret), data_path, shard_num);
87+
std::get<3>(ret), std::get<4>(ret), data_path, shard_num);
8888

8989
t0 += grape::GetCurrentTime();
9090

flex/bin/sync_server.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ int main(int argc, char** argv) {
207207

208208
auto ret = gs::Schema::LoadFromYaml(graph_schema_path, bulk_load_config_path);
209209
db.Init(std::get<0>(ret), std::get<1>(ret), std::get<2>(ret),
210-
std::get<3>(ret), data_path, shard_num);
210+
std::get<3>(ret), std::get<4>(ret), data_path, shard_num);
211211

212212
t0 += grape::GetCurrentTime();
213213

flex/engines/graph_db/database/graph_db.cc

+4-4
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,10 @@ GraphDB& GraphDB::get() {
5151
void GraphDB::Init(
5252
const Schema& schema,
5353
const std::vector<std::pair<std::string, std::string>>& vertex_files,
54-
const std::vector<std::tuple<std::string, std::string, std::string,
55-
std::string>>& edge_files,
56-
const std::vector<std::string>& plugins, const std::string& data_dir,
57-
int thread_num) {
54+
const std::vector<std::tuple<std::string, std::string, std::string, int32_t,
55+
int32_t, std::string>>& edge_files,
56+
const std::vector<std::string>& plugins, const LoadConfig& load_config,
57+
const std::string& data_dir, int thread_num) {
5858
std::filesystem::path data_dir_path(data_dir);
5959
if (!std::filesystem::exists(data_dir_path)) {
6060
std::filesystem::create_directory(data_dir_path);

flex/engines/graph_db/database/graph_db.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,9 @@ class GraphDB {
4949
const Schema& schema,
5050
const std::vector<std::pair<std::string, std::string>>& vertex_files,
5151
const std::vector<std::tuple<std::string, std::string, std::string,
52-
std::string>>& edge_files,
53-
const std::vector<std::string>& plugins, const std::string& data_dir,
54-
int thread_num = 1);
52+
int32_t, int32_t, std::string>>& edge_files,
53+
const std::vector<std::string>& plugins, const LoadConfig& config,
54+
const std::string& data_dir, int thread_num = 1);
5555

5656
/** @brief Create a transaction to read vertices and edges.
5757
*

flex/storages/rt_mutable_graph/README.md

+72-48
Original file line numberDiff line numberDiff line change
@@ -25,64 +25,88 @@ The configuration file ([modern graph example](./modern_graph/modern_graph.yaml)
2525
Here is an example of a configuration file:
2626

2727
```yaml
28-
graph:
29-
graph_store: mutable_csr
30-
vertex:
31-
- label_name: person
28+
name: modern
29+
store_type: mutable_csr
30+
stored_procedures:
31+
directory: plugins
32+
enable_lists:
33+
- libxxx.so
34+
schema:
35+
vertex_types:
36+
- type_name: person
37+
x_csr_params:
38+
max_vertex_num: 100
3239
properties:
33-
- name: _ID
34-
type: int64
35-
- name: name
36-
type: String
37-
- name: age
38-
type: int32
39-
max_vertex_num: 100
40-
- label_name: software
40+
- property_id: 0
41+
property_name: id
42+
property_type:
43+
primitive_type: DT_SIGNED_INT64
44+
- property_id: 1
45+
property_name: name
46+
property_type:
47+
primitive_type: DT_STRING
48+
- property_id: 2
49+
property_name: age
50+
property_type:
51+
primitive_type: DT_SIGNED_INT32
52+
primary_keys:
53+
- id
54+
- type_name: software
55+
x_csr_params:
56+
max_vertex_num: 100
4157
properties:
42-
- name: _ID
43-
type: int64
44-
- name: name
45-
type: String
46-
- name: lang
47-
type: String
48-
max_vertex_num: 100
49-
edge:
50-
- src_label_name: person
51-
dst_label_name: software
52-
edge_label_name: created
58+
- property_id: 0
59+
property_name: id
60+
property_type:
61+
primitive_type: DT_SIGNED_INT64
62+
x_csr_params:
63+
- property_id: 1
64+
property_name: name
65+
property_type:
66+
primitive_type: DT_STRING
67+
- property_id: 2
68+
property_name: lang
69+
property_type:
70+
primitive_type: DT_STRING
71+
primary_keys:
72+
- id
73+
edge_types:
74+
- type_name: knows
75+
x_csr_params:
76+
incoming_edge_strategy: None
77+
outgoing_edge_strategy: Multiple
78+
vertex_type_pair_relations:
79+
source_vertex: person
80+
destination_vertex: person
81+
relation: MANY_TO_MANY
5382
properties:
54-
- name: _SRC
55-
type: int64
56-
- name: _DST
57-
type: int64
58-
- name: weight
59-
type: double
60-
incoming_edge_strategy: None
61-
outgoing_edge_strategy: Single
62-
- src_label_name: person
63-
dst_label_name: person
64-
edge_label_name: knows
83+
- property_id: 0
84+
property_name: weight
85+
property_type:
86+
primitive_type: DT_DOUBLE
87+
- type_name: created
88+
x_csr_params:
89+
incoming_edge_strategy: None
90+
outgoing_edge_strategy: Single
91+
vertex_type_pair_relations:
92+
source_vertex: person
93+
destination_vertex: software
94+
relation: ONE_TO_MANY
6595
properties:
66-
- name: _SRC
67-
type: int64
68-
- name: _DST
69-
type: int64
70-
- name: weight
71-
type: double
72-
incoming_edge_strategy: None
73-
outgoing_edge_strategy: Multiple
74-
75-
stored_procedures:
76-
- libxxx.so
96+
- property_id: 0
97+
property_name: weight
98+
property_type:
99+
primitive_type: DT_DOUBLE
77100
```
78101
79102
Notes:
80103
81-
- `_ID`, `_SRC`, `_DST` are reserved words, they are the external id of vertices, only int64 type is supported.
82-
- `max_vertex_num` limit the number of vertices of this type:
104+
- Currently we only support one primary key, and the type has to be `DT_SIGNED_INT64`.
105+
- All implementation related configuration are put under x_csr_params.
106+
- `max_vertex_num` limit the number of vertices of this type:
83107
- The limit number is used to `mmap` memory, so it only takes virtual memory before vertices are actually inserted.
84108
- If `max_vertex_num` is not set, a default large number (e.g.: 2^48) will be used.
85-
- `incoming/outgoing_edge_strategy` specifies the storing strategy of the incoming or outgoing edges of this type, there are 3 kinds of strategies
109+
- `incoming/outgoing_edge_strategy` specifies the storing strategy of the incoming or outgoing edges of this type, there are 3 kinds of strategies
86110
- None: no edge will be stored
87111
- Single: only one edge will be stored
88112
- Multiple(default): multiple edges will be stored
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
/** Copyright 2020 Alibaba Group Holding Limited.
2+
*
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
16+
#ifndef STORAGE_RT_MUTABLE_GRAPH_LOAD_CONFIG_H_
17+
#define STORAGE_RT_MUTABLE_GRAPH_LOAD_CONFIG_H_
18+
19+
#include <string>
20+
21+
namespace gs {
22+
// Provide meta info about bulk loading.
23+
struct LoadConfig {
24+
std::string data_source_; // "file", "hdfs", "oss", "s3"
25+
std::string delimiter_; // "\t", ",", " ", "|"
26+
std::string method_; // init, append, overwrite
27+
};
28+
} // namespace gs
29+
30+
#endif // STORAGE_RT_MUTABLE_GRAPH_LOAD_CONFIG_H_

0 commit comments

Comments
 (0)