1
- """
2
- Wikipedia page-page networks on two topics: chameleons and squirrels.
3
- """
1
+ """Datasets introduced in the Geom-GCN paper."""
4
2
import os
5
3
6
4
import numpy as np
10
8
from .utils import _get_dgl_url
11
9
12
10
13
- class WikiNetworkDataset (DGLBuiltinDataset ):
14
- r"""Wikipedia page-page networks from `Multi-scale Attributed
15
- Node Embedding <https://arxiv.org/abs/1909.13021>`__ and later modified by
11
+ class GeomGCNDataset (DGLBuiltinDataset ):
12
+ r"""Datasets introduced in
16
13
`Geom-GCN: Geometric Graph Convolutional Networks
17
- <https://arxiv.org/abs/2002.05287>`
14
+ <https://arxiv.org/abs/2002.05287>`__
18
15
19
16
Parameters
20
17
----------
@@ -34,7 +31,7 @@ class WikiNetworkDataset(DGLBuiltinDataset):
34
31
35
32
def __init__ (self , name , raw_dir , force_reload , verbose , transform ):
36
33
url = _get_dgl_url (f"dataset/{ name } .zip" )
37
- super (WikiNetworkDataset , self ).__init__ (
34
+ super (GeomGCNDataset , self ).__init__ (
38
35
name = name ,
39
36
url = url ,
40
37
raw_dir = raw_dir ,
@@ -106,11 +103,11 @@ def num_classes(self):
106
103
return self ._num_classes
107
104
108
105
109
- class ChameleonDataset (WikiNetworkDataset ):
106
+ class ChameleonDataset (GeomGCNDataset ):
110
107
r"""Wikipedia page-page network on chameleons from `Multi-scale Attributed
111
108
Node Embedding <https://arxiv.org/abs/1909.13021>`__ and later modified by
112
109
`Geom-GCN: Geometric Graph Convolutional Networks
113
- <https://arxiv.org/abs/2002.05287>`
110
+ <https://arxiv.org/abs/2002.05287>`__
114
111
115
112
Nodes represent articles from the English Wikipedia, edges reflect mutual
116
113
links between them. Node features indicate the presence of particular nouns
@@ -182,11 +179,11 @@ def __init__(
182
179
)
183
180
184
181
185
- class SquirrelDataset (WikiNetworkDataset ):
182
+ class SquirrelDataset (GeomGCNDataset ):
186
183
r"""Wikipedia page-page network on squirrels from `Multi-scale Attributed
187
184
Node Embedding <https://arxiv.org/abs/1909.13021>`__ and later modified by
188
185
`Geom-GCN: Geometric Graph Convolutional Networks
189
- <https://arxiv.org/abs/2002.05287>`
186
+ <https://arxiv.org/abs/2002.05287>`__
190
187
191
188
Nodes represent articles from the English Wikipedia, edges reflect mutual
192
189
links between them. Node features indicate the presence of particular nouns
@@ -256,3 +253,155 @@ def __init__(
256
253
verbose = verbose ,
257
254
transform = transform ,
258
255
)
256
+
257
+
258
+ class CornellDataset (GeomGCNDataset ):
259
+ r"""Cornell subset of
260
+ `WebKB <http://www.cs.cmu.edu/afs/cs.cmu.edu/project/theo-11/www/wwkb/>`__,
261
+ later modified by `Geom-GCN: Geometric Graph Convolutional Networks
262
+ <https://arxiv.org/abs/2002.05287>`__
263
+
264
+ Nodes represent web pages. Edges represent hyperlinks between them. Node
265
+ features are the bag-of-words representation of web pages. The web pages
266
+ are manually classified into the five categories, student, project, course,
267
+ staff, and faculty.
268
+
269
+ Statistics:
270
+
271
+ - Nodes: 183
272
+ - Edges: 298
273
+ - Number of Classes: 5
274
+ - 10 train/val/test splits
275
+
276
+ - Train: 87
277
+ - Val: 59
278
+ - Test: 37
279
+
280
+ Parameters
281
+ ----------
282
+ raw_dir : str, optional
283
+ Raw file directory to store the processed data. Default: ~/.dgl/
284
+ force_reload : bool, optional
285
+ Whether to re-download the data source. Default: False
286
+ verbose : bool, optional
287
+ Whether to print progress information. Default: True
288
+ transform : callable, optional
289
+ A transform that takes in a :class:`~dgl.DGLGraph` object and returns
290
+ a transformed version. The :class:`~dgl.DGLGraph` object will be
291
+ transformed before every access. Default: None
292
+
293
+ Attributes
294
+ ----------
295
+ num_classes : int
296
+ Number of node classes
297
+
298
+ Notes
299
+ -----
300
+ The graph does not come with edges for both directions.
301
+
302
+ Examples
303
+ --------
304
+
305
+ >>> from dgl.data import CornellDataset
306
+ >>> dataset = CornellDataset()
307
+ >>> g = dataset[0]
308
+ >>> num_classes = dataset.num_classes
309
+
310
+ >>> # get node features
311
+ >>> feat = g.ndata["feat"]
312
+
313
+ >>> # get data split
314
+ >>> train_mask = g.ndata["train_mask"]
315
+ >>> val_mask = g.ndata["val_mask"]
316
+ >>> test_mask = g.ndata["test_mask"]
317
+
318
+ >>> # get labels
319
+ >>> label = g.ndata['label']
320
+ """
321
+
322
+ def __init__ (
323
+ self , raw_dir = None , force_reload = False , verbose = True , transform = None
324
+ ):
325
+ super (CornellDataset , self ).__init__ (
326
+ name = "cornell" ,
327
+ raw_dir = raw_dir ,
328
+ force_reload = force_reload ,
329
+ verbose = verbose ,
330
+ transform = transform ,
331
+ )
332
+
333
+
334
+ class TexasDataset (GeomGCNDataset ):
335
+ r"""Texas subset of
336
+ `WebKB <http://www.cs.cmu.edu/afs/cs.cmu.edu/project/theo-11/www/wwkb/>`__,
337
+ later modified by `Geom-GCN: Geometric Graph Convolutional Networks
338
+ <https://arxiv.org/abs/2002.05287>`__
339
+
340
+ Nodes represent web pages. Edges represent hyperlinks between them. Node
341
+ features are the bag-of-words representation of web pages. The web pages
342
+ are manually classified into the five categories, student, project, course,
343
+ staff, and faculty.
344
+
345
+ Statistics:
346
+
347
+ - Nodes: 183
348
+ - Edges: 325
349
+ - Number of Classes: 5
350
+ - 10 train/val/test splits
351
+
352
+ - Train: 87
353
+ - Val: 59
354
+ - Test: 37
355
+
356
+ Parameters
357
+ ----------
358
+ raw_dir : str, optional
359
+ Raw file directory to store the processed data. Default: ~/.dgl/
360
+ force_reload : bool, optional
361
+ Whether to re-download the data source. Default: False
362
+ verbose : bool, optional
363
+ Whether to print progress information. Default: True
364
+ transform : callable, optional
365
+ A transform that takes in a :class:`~dgl.DGLGraph` object and returns
366
+ a transformed version. The :class:`~dgl.DGLGraph` object will be
367
+ transformed before every access. Default: None
368
+
369
+ Attributes
370
+ ----------
371
+ num_classes : int
372
+ Number of node classes
373
+
374
+ Notes
375
+ -----
376
+ The graph does not come with edges for both directions.
377
+
378
+ Examples
379
+ --------
380
+
381
+ >>> from dgl.data import TexasDataset
382
+ >>> dataset = TexasDataset()
383
+ >>> g = dataset[0]
384
+ >>> num_classes = dataset.num_classes
385
+
386
+ >>> # get node features
387
+ >>> feat = g.ndata["feat"]
388
+
389
+ >>> # get data split
390
+ >>> train_mask = g.ndata["train_mask"]
391
+ >>> val_mask = g.ndata["val_mask"]
392
+ >>> test_mask = g.ndata["test_mask"]
393
+
394
+ >>> # get labels
395
+ >>> label = g.ndata['label']
396
+ """
397
+
398
+ def __init__ (
399
+ self , raw_dir = None , force_reload = False , verbose = True , transform = None
400
+ ):
401
+ super (TexasDataset , self ).__init__ (
402
+ name = "texas" ,
403
+ raw_dir = raw_dir ,
404
+ force_reload = force_reload ,
405
+ verbose = verbose ,
406
+ transform = transform ,
407
+ )
0 commit comments