dmlc · jermainewang · Apr 13, 2020 · Feb 28, 2020 · Mar 1, 2020 · Mar 16, 2020
diff --git a/examples/pytorch/gcmc/README.md b/examples/pytorch/gcmc/README.md
@@ -11,41 +11,217 @@ Credit: Jiani Zhang ([@jennyzhang0215](https://github.com/jennyzhang0215))
 ## Dependencies
 * PyTorch 1.2+
 * pandas
-* torchtext 0.4+
+* torchtext 0.4+ (if using user and item contents as node features)
 
 ## Data
 
 Supported datasets: ml-100k, ml-1m, ml-10m
 
 ## How to run
-
+### Train with full-graph
 ml-100k, no feature
 ```bash
-python train.py --data_name=ml-100k --use_one_hot_fea --gcn_agg_accum=stack
+python3 train.py --data_name=ml-100k --use_one_hot_fea --gcn_agg_accum=stack
 ```
 Results: RMSE=0.9088 (0.910 reported)
-Speed: 0.0195s/epoch (vanilla implementation: 0.1008s/epoch)
+Speed: 0.0410s/epoch (vanilla implementation: 0.1008s/epoch)
 
 ml-100k, with feature
 ```bash
-python train.py --data_name=ml-100k --gcn_agg_accum=stack
+python3 train.py --data_name=ml-100k --gcn_agg_accum=stack
 ```
 Results: RMSE=0.9448 (0.905 reported)
 
 ml-1m, no feature
 ```bash
-python train.py --data_name=ml-1m --gcn_agg_accum=sum --use_one_hot_fea
+python3 train.py --data_name=ml-1m --gcn_agg_accum=sum --use_one_hot_fea
 ```
 Results: RMSE=0.8377 (0.832 reported)
-Speed: 0.0557s/epoch (vanilla implementation: 1.538s/epoch)
+Speed: 0.0844s/epoch (vanilla implementation: 1.538s/epoch)
 
 ml-10m, no feature
 ```bash
-python train.py --data_name=ml-10m --gcn_agg_accum=stack --gcn_dropout=0.3 \
+python3 train.py --data_name=ml-10m --gcn_agg_accum=stack --gcn_dropout=0.3 \
                                  --train_lr=0.001 --train_min_lr=0.0001 --train_max_iter=15000 \
                                  --use_one_hot_fea --gen_r_num_basis_func=4
 ```
 Results: RMSE=0.7800 (0.777 reported)
-Speed: 0.9207/epoch (vanilla implementation: OOM)
+Speed: 1.1982/epoch (vanilla implementation: OOM)
+Testbed: EC2 p3.2xlarge instance(Amazon Linux 2)
+
+### Train with minibatch on a single GPU
+ml-100k, no feature
+```bash
+python3 train_sampling.py --data_name=ml-100k \
+                          --use_one_hot_fea \
+                          --gcn_agg_accum=stack \
+                          --gpu 0
 
-Testbed: EC2 p3.2xlarge instance(Amazon Linux 2)
+```
+ml-100k, no feature with mix_cpu_gpu run, for mix_cpu_gpu run with no feature, the W_r is stored in CPU by default other than in GPU.
+```bash
+python3 train_sampling.py --data_name=ml-100k \
+                          --use_one_hot_fea \
+                          --gcn_agg_accum=stack \
+                          --mix_cpu_gpu \
+                          --gpu 0 
+```
+Results: RMSE=0.9380
+Speed: 1.059s/epoch (Run with 70 epoches)
+Speed: 1.046s/epoch (mix_cpu_gpu)
+
+ml-100k, with feature
+```bash
+python3 train_sampling.py --data_name=ml-100k \
+                          --gcn_agg_accum=stack \
+                          --train_max_epoch 90 \
+                          --gpu 0
+```
+Results: RMSE=0.9574
+
+ml-1m, no feature
+```bash
+python3 train_sampling.py --data_name=ml-1m \
+                          --gcn_agg_accum=sum \
+                          --use_one_hot_fea \
+                          --train_max_epoch 160 \
+                          --gpu 0
+```
+ml-1m, no feature with mix_cpu_gpu run
+```bash
+python3 train_sampling.py --data_name=ml-1m \
+                          --gcn_agg_accum=sum \
+                          --use_one_hot_fea \
+                          --train_max_epoch 60 \
+                          --mix_cpu_gpu \
+                          --gpu 0
+```
+Results: RMSE=0.8632
+Speed: 7.852s/epoch (Run with 60 epoches)
+Speed: 7.788s/epoch (mix_cpu_gpu)
+
+ml-10m, no feature
+```bash
+python3 train_sampling.py --data_name=ml-10m \
+                          --gcn_agg_accum=stack \
+                          --gcn_dropout=0.3 \
+                          --train_lr=0.001 \
+                          --train_min_lr=0.0001 \
+                          --train_max_epoch=60 \
+                          --use_one_hot_fea \
+                          --gen_r_num_basis_func=4 \
+                          --gpu 0
+```
+ml-10m, no feature with mix_cpu_gpu run
+```bash
+python3 train_sampling.py --data_name=ml-10m \
+                          --gcn_agg_accum=stack \
+                          --gcn_dropout=0.3 \
+                          --train_lr=0.001 \
+                          --train_min_lr=0.0001 \
+                          --train_max_epoch=60 \
+                          --use_one_hot_fea \
+                          --gen_r_num_basis_func=4 \
+                          --mix_cpu_gpu \
+                          --gpu 0
+```
+Results: RMSE=0.8050
+Speed: 394.304s/epoch (Run with 60 epoches)
+Speed: 408.749s/epoch (mix_cpu_gpu)
+Testbed: EC2 p3.2xlarge instance
+
+### Train with minibatch on multi-GPU
+ml-100k, no feature
+```bash
+python train_sampling.py --data_name=ml-100k \
+                         --gcn_agg_accum=stack \
+                         --train_max_epoch 30 \
+                         --train_lr 0.02 \
+                         --use_one_hot_fea \
+                         --gpu 0,1,2,3,4,5,6,7
+```
+ml-100k, no feature with mix_cpu_gpu run
+```bash
+python train_sampling.py --data_name=ml-100k \
+                         --gcn_agg_accum=stack \
+                         --train_max_epoch 30 \
+                         --train_lr 0.02 \
+                         --use_one_hot_fea \
+                         --mix_cpu_gpu \
+                         --gpu 0,1,2,3,4,5,6,7
+```
+Result: RMSE=0.9397
+Speed: 1.202s/epoch (Run with only 30 epoches) 
+Speed: 1.245/epoch (mix_cpu_gpu)
+
+ml-100k, with feature
+```bash
+python train_sampling.py --data_name=ml-100k \
+                         --gcn_agg_accum=stack \
+                         --train_max_epoch 30 \
+                         --gpu 0,1,2,3,4,5,6,7
+```
+Result: RMSE=0.9655
+Speed:  1.265/epoch (Run with 30 epoches)
+
+ml-1m, no feature
+```bash
+python train_sampling.py --data_name=ml-1m \
+                         --gcn_agg_accum=sum \
+                         --train_max_epoch 40 \
+                         --use_one_hot_fea \
+                         --gpu 0,1,2,3,4,5,6,7
+```
+ml-1m, no feature with mix_cpu_gpu run
+```bash
+python train_sampling.py --data_name=ml-1m \
+                         --gcn_agg_accum=sum \
+                         --train_max_epoch 40 \
+                         --use_one_hot_fea \
+                         --mix_cpu_gpu \
+                         --gpu 0,1,2,3,4,5,6,7
+```
+Results: RMSE=0.8621
+Speed: 11.612s/epoch (Run with 40 epoches)
+Speed: 12.483s/epoch (mix_cpu_gpu)
+
+ml-10m, no feature
+```bash
+python train_sampling.py --data_name=ml-10m \
+                         --gcn_agg_accum=stack \
+                         --gcn_dropout=0.3 \
+                         --train_lr=0.001 \
+                         --train_min_lr=0.0001 \
+                         --train_max_epoch=30 \
+                         --use_one_hot_fea \
+                         --gen_r_num_basis_func=4 \
+                         --gpu 0,1,2,3,4,5,6,7
+```
+ml-10m, no feature with mix_cpu_gpu run
+```bash
+python train_sampling.py --data_name=ml-10m \
+                         --gcn_agg_accum=stack \
+                         --gcn_dropout=0.3 \
+                         --train_lr=0.001 \
+                         --train_min_lr=0.0001 \
+                         --train_max_epoch=30 \
+                         --use_one_hot_fea \
+                         --gen_r_num_basis_func=4 \
+                         --mix_cpu_gpu \
+                         --gpu 0,1,2,3,4,5,6,7
+```
+Results: RMSE=0.8084
+Speed: 632.868s/epoch (Run with 30 epoches)
+Speed: 633.397s/epoch (mix_cpu_gpu)
+Testbed: EC2 p3.16xlarge instance
+
+### Train with minibatch on CPU
+ml-100k, no feature
+```bash
+python3 train_sampling.py --data_name=ml-100k \
+                          --use_one_hot_fea \
+                          --gcn_agg_accum=stack \
+                          --gpu -1
+```
+Speed 1.591s/epoch
+Testbed: EC2 r5.xlarge instance
diff --git a/examples/pytorch/gcmc/data.py b/examples/pytorch/gcmc/data.py
@@ -5,8 +5,6 @@
 import pandas as pd
 import scipy.sparse as sp
 import torch as th
-from torchtext import data
-from torchtext.vocab import GloVe
 
 import dgl
 from dgl.data.utils import download, extract_archive, get_download_dir
@@ -84,6 +82,8 @@ class MovieLens(object):
         Dataset name. Could be "ml-100k", "ml-1m", "ml-10m"
     device : torch.device
         Device context
+    mix_cpu_gpu : boo, optional
+        If true, the ``user_feature`` attribute is stored in CPU
     use_one_hot_fea : bool, optional
         If true, the ``user_feature`` attribute is None, representing an one-hot identity
         matrix. (Default: False)
@@ -96,7 +96,8 @@ class MovieLens(object):
         Ratio of validation data
 
     """
-    def __init__(self, name, device, use_one_hot_fea=False, symm=True,
+    def __init__(self, name, device, mix_cpu_gpu=False,
+                 use_one_hot_fea=False, symm=True,
                  test_ratio=0.1, valid_ratio=0.1):
         self._name = name
         self._device = device
@@ -164,8 +165,13 @@ def __init__(self, name, device, use_one_hot_fea=False, symm=True,
             self.user_feature = None
             self.movie_feature = None
         else:
-            self.user_feature = th.FloatTensor(self._process_user_fea()).to(device)
-            self.movie_feature = th.FloatTensor(self._process_movie_fea()).to(device)
+            # if mix_cpu_gpu, we put features in CPU
+            if mix_cpu_gpu:
+                self.user_feature = th.FloatTensor(self._process_user_fea())
+                self.movie_feature = th.FloatTensor(self._process_movie_fea())
+            else:
+                self.user_feature = th.FloatTensor(self._process_user_fea()).to(self._device)
+                self.movie_feature = th.FloatTensor(self._process_movie_fea()).to(self._device)
         if self.user_feature is None:
             self.user_feature_shape = (self.num_user, self.num_user)
             self.movie_feature_shape = (self.num_movie, self.num_movie)
@@ -204,6 +210,7 @@ def _make_labels(ratings):
         def _npairs(graph):
             rst = 0
             for r in self.possible_rating_values:
+                r = str(r).replace('.', '_')
                 rst += graph.number_of_edges(str(r))
             return rst
 
@@ -245,9 +252,10 @@ def _generate_enc_graph(self, rating_pairs, rating_values, add_support=False):
             ridx = np.where(rating_values == rating)
             rrow = rating_row[ridx]
             rcol = rating_col[ridx]
-            bg = dgl.bipartite((rrow, rcol), 'user', str(rating), 'movie',
+            rating = str(rating).replace('.', '_')
+            bg = dgl.bipartite((rrow, rcol), 'user', rating, 'movie',
                                num_nodes=(self._num_user, self._num_movie))
-            rev_bg = dgl.bipartite((rcol, rrow), 'movie', 'rev-%s' % str(rating), 'user',
+            rev_bg = dgl.bipartite((rcol, rrow), 'movie', 'rev-%s' % rating, 'user',
                                num_nodes=(self._num_movie, self._num_user))
             rating_graphs.append(bg)
             rating_graphs.append(rev_bg)
@@ -267,7 +275,7 @@ def _calc_norm(x):
             movie_ci = []
             movie_cj = []
             for r in self.possible_rating_values:
-                r = str(r)
+                r = str(r).replace('.', '_')
                 user_ci.append(graph['rev-%s' % r].in_degrees())
                 movie_ci.append(graph[r].in_degrees())
                 if self._symm:
@@ -494,6 +502,8 @@ def _process_movie_fea(self):
             Generate movie features by concatenating embedding and the year
 
         """
+        import torchtext
+
         if self._name == 'ml-100k':
             GENRES = GENRES_ML_100K
         elif self._name == 'ml-1m':
@@ -503,8 +513,8 @@ def _process_movie_fea(self):
         else:
             raise NotImplementedError
 
-        TEXT = data.Field(tokenize='spacy')
-        embedding = GloVe(name='840B', dim=300)
+        TEXT = torchtext.data.Field(tokenize='spacy')
+        embedding = torchtext.vocab.GloVe(name='840B', dim=300)
 
         title_embedding = np.zeros(shape=(self.movie_info.shape[0], 300), dtype=np.float32)
         release_years = np.zeros(shape=(self.movie_info.shape[0], 1), dtype=np.float32)