Skip to content

Commit b9e4a5b

Browse files
zheng-daVoVAllen
andauthored
[Test] add regression tests for graph partitioning. (dmlc#1561)
* add tests. * 111 * fix * Update asv.conf.json * fix. * benchmark partition with livejournal. * fix benchmark * fix. * fix. * remove ogb * Revert "Update asv.conf.json" This reverts commit dd327a5. * change branch * depend pandas * Revert "change branch" This reverts commit 1d4f937. * Update README.md Co-authored-by: VoVAllen <[email protected]> Co-authored-by: Jinjing Zhou <[email protected]>
1 parent d2e1cfc commit b9e4a5b

File tree

8 files changed

+109
-8
lines changed

8 files changed

+109
-8
lines changed

asv.conf.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -142,4 +142,4 @@
142142
// "some_benchmark": 0.01, // Threshold of 1%
143143
// "another_benchmark": 0.5, // Threshold of 50%
144144
// },
145-
}
145+
}

tests/regression/README.md

+1-6
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,7 @@ The basic use is execute a script, and get the needed results out of the printed
2121
The default regression branch in asv is `master`. If you need to run on other branch on your fork, please change the `branches` value in the `asv.conf.json` at the root of your repo.
2222

2323
```bash
24-
docker run --name dgl-reg --rm --hostname=reg-machine --runtime=nvidia -dit dgllib/dgl-ci-gpu:conda /bin/bash
25-
docker cp ./asv_data dgl-reg:/root/asv_data/
26-
docker cp ./run.sh dgl-reg:/root/run.sh <repo> <branch>
27-
docker exec dgl-reg bash /root/asv_data/run.sh
28-
docker cp dgl-reg:/root/regression/dgl/asv/. ./asv_data/ # Change /home/ubuntu/asv to the path you want to put the result
29-
docker stop dgl-reg
24+
bash ./publish.sh <repo> <branch>
3025
```
3126

3227
The running result will be at `./asv_data/`. You can use `python -m http.server` inside the `html` folder to start a server to see the result

tests/regression/bench_partition.py

+49
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Write the benchmarking functions here.
2+
# See "Writing benchmarks" in the asv docs for more information.
3+
4+
import subprocess
5+
import os
6+
from pathlib import Path
7+
import numpy as np
8+
import tempfile
9+
10+
base_path = Path("~/regression/dgl/")
11+
12+
class PartitionBenchmark:
13+
14+
params = [['pytorch'], ['livejournal']]
15+
param_names = ['backend', 'dataset']
16+
timeout = 600
17+
18+
def __init__(self):
19+
self.std_log = {}
20+
21+
def setup(self, backend, dataset):
22+
key_name = "{}_{}".format(backend, dataset)
23+
if key_name in self.std_log:
24+
return
25+
bench_path = base_path / "tests/regression/benchmarks/partition.py"
26+
bashCommand = "/opt/conda/envs/{}-ci/bin/python {} --dataset {}".format(
27+
backend, bench_path.expanduser(), dataset)
28+
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE,env=dict(os.environ, DGLBACKEND=backend))
29+
output, error = process.communicate()
30+
print(str(error))
31+
self.std_log[key_name] = str(output)
32+
33+
34+
def track_partition_time(self, backend, dataset):
35+
key_name = "{}_{}".format(backend, dataset)
36+
lines = self.std_log[key_name].split("\\n")
37+
38+
time_list = []
39+
for line in lines:
40+
# print(line)
41+
if 'Time:' in line:
42+
time_str = line.strip().split(' ')[1]
43+
time = float(time_str)
44+
time_list.append(time)
45+
return np.array(time_list).mean()
46+
47+
48+
PartitionBenchmark.track_partition_time.unit = 's'
49+
+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import dgl
2+
from dgl import distributed as dgl_distributed
3+
import argparse, time
4+
from utils import get_graph
5+
6+
parser = argparse.ArgumentParser(description='partition')
7+
parser.add_argument("--dataset", type=str, default='livejournal',
8+
help="specify the graph for partitioning")
9+
parser.add_argument("--num_parts", type=int, default=16,
10+
help="the number of partitions")
11+
args = parser.parse_args()
12+
13+
g = get_graph(args.dataset)
14+
print('{}: |V|={}, |E|={}'.format(args.dataset, g.number_of_nodes(), g.number_of_edges()))
15+
start = time.time()
16+
dgl_distributed.partition_graph(g, args.dataset, args.num_parts, '/tmp', num_hops=1, part_method="metis")
17+
print('Time: {} seconds'.format(time.time() - start))

tests/regression/benchmarks/utils.py

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import os
2+
import shutil, zipfile
3+
import requests
4+
import numpy as np
5+
import pandas
6+
import dgl
7+
8+
def _download(url, path, filename):
9+
fn = os.path.join(path, filename)
10+
if os.path.exists(fn):
11+
return
12+
13+
os.makedirs(path, exist_ok=True)
14+
f_remote = requests.get(url, stream=True)
15+
sz = f_remote.headers.get('content-length')
16+
assert f_remote.status_code == 200, 'fail to open {}'.format(url)
17+
with open(fn, 'wb') as writer:
18+
for chunk in f_remote.iter_content(chunk_size=1024*1024):
19+
writer.write(chunk)
20+
print('Download finished.')
21+
22+
def get_livejournal():
23+
_download('https://snap.stanford.edu/data/soc-LiveJournal1.txt.gz',
24+
'/tmp', 'soc-LiveJournal1.txt.gz')
25+
df = pandas.read_csv('/tmp/soc-LiveJournal1.txt.gz', sep='\t', skiprows=4, header=None,
26+
names=['src', 'dst'], compression='gzip')
27+
src = np.array(df['src'])
28+
dst = np.array(df['dst'])
29+
print('construct the graph')
30+
return dgl.DGLGraph((src, dst), readonly=True)
31+
32+
def get_graph(name):
33+
if name == 'livejournal':
34+
return get_livejournal()
35+
else:
36+
print(name + " doesn't exist")
37+
return None

tests/regression/install_dgl_asv.sh

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ pip uninstall -y dgl
1616
python3 setup.py install
1717
# test inplace build (for cython)
1818
python3 setup.py build_ext --inplace
19+
python3 -m pip install -r /root/requirement.txt
1920
done
2021
popd
2122
conda deactivate

tests/regression/publish.sh

+2-1
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,10 @@ else
1010
BRANCH=$2
1111
fi
1212

13-
docker run --name dgl-reg --rm --hostname=reg-machine --runtime=nvidia -dit dgllib/dgl-ci-gpu:conda /bin/bash
13+
docker run --name dgl-reg --rm --runtime=nvidia --hostname=reg-machine -dit dgllib/dgl-ci-gpu:conda /bin/bash
1414
docker cp ./asv_data dgl-reg:/root/asv_data/
1515
docker cp ./run.sh dgl-reg:/root/run.sh
16+
docker cp ./requirement.txt dgl-reg:/root/requirement.txt
1617
docker exec dgl-reg bash /root/run.sh $REPO $BRANCH
1718
docker cp dgl-reg:/root/regression/dgl/asv/. ./asv_data/
1819
docker stop dgl-reg

tests/regression/requirement.txt

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
pandas

0 commit comments

Comments
 (0)