-
Notifications
You must be signed in to change notification settings - Fork 1
/
graph_kernel.py
134 lines (119 loc) · 6.03 KB
/
graph_kernel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import numpy as np
import gpflow
from gpflow import Parameter
from gpflow.inducing_variables.inducing_variables import InducingPointsBase
from gpflow import covariances as cov
import tensorflow as tf
from utils import sparse_mat_to_sparse_tensor, get_submatrix
class GraphPolynomial(gpflow.kernels.base.Kernel):
"""
GraphPolynomial kernel for node classification as introduced in
Yin Chen Ng, Nicolo Colombo, Ricardo Silva: "Bayesian Semi-supervised
Learning with Graph Gaussian Processes".
"""
def __init__(self, sparse_adj_mat, feature_mat, idx_train, degree=3.0,
variance=1.0, offset=1.0):
super().__init__(None)
self.degree = degree
self.offset = Parameter(offset, transform=gpflow.utilities.positive())
self.variance = Parameter(variance, transform=gpflow.utilities.positive())
# Pre-compute the P-matrix for transforming the base covariance matrix
# (c.f. paper for details).
sparse_adj_mat[np.diag_indices(sparse_adj_mat.shape[0])] = 1.0
self.sparse_P = sparse_mat_to_sparse_tensor(sparse_adj_mat)
self.sparse_P = self.sparse_P / sparse_adj_mat.sum(axis=1)
self.feature_mat = feature_mat
# Compute data required for efficient computation of training
# covariance matrix.
(self.tr_feature_mat, self.tr_sparse_P,
self.idx_train_relative) = self._compute_train_data(
sparse_adj_mat, idx_train, feature_mat,
tf.sparse.to_dense(self.sparse_P).numpy())
def _compute_train_data(self, adj_matrix, train_idcs, feature_mat,
conv_mat):
"""
Computes all the variables required for computing the covariance matrix
for training in a computationally efficient way. The idea is to cut out
those features from the original feature matrix that are required for
predicting the training labels, which are the training nodes' features
and their neihbors' features.
:param adj_matrix: Original dense adjacency matrix of the graph.
:param train_idcs: Indices of the training nodes.
:param feature_mat: Original dense feature matrix.
:param conv_mat: Original matrix used for computing the graph
convolutions.
:return: Cut outs of only the relevant nodes.
- Feature matrix containing features of only the "relevant" nodes,
i.e. the training nodes and their neighbors. Shape [num_rel,
num_feats].
- Convolutional matrix for only the relevant nodes. Shape [num_rel,
num_rel].
- Indices of the training nodes within the relevant nodes. Shape
[num_rel].
"""
sub_node_idcs = get_submatrix(adj_matrix, train_idcs)
# Compute indices of actual train nodes (excluding their neighbours)
# within the sub node indices
relative_train_idcs = np.isin(sub_node_idcs, train_idcs)
relative_train_idcs = np.where(relative_train_idcs == True)[0]
return (feature_mat[sub_node_idcs],
conv_mat[sub_node_idcs, :][:, sub_node_idcs],
relative_train_idcs)
def K(self, X, Y=None, presliced=False):
X = tf.reshape(tf.cast(X, tf.int32), [-1])
X2 = tf.reshape(tf.cast(Y, tf.int32), [-1]) if Y is not None else X
base_cov = (self.variance * tf.matmul(self.feature_mat, self.feature_mat, transpose_b=True) + self.offset) ** self.degree
cov = tf.sparse.sparse_dense_matmul(self.sparse_P, base_cov)
cov = tf.sparse.sparse_dense_matmul(self.sparse_P, cov, adjoint_b=True)
cov = tf.gather(tf.gather(cov, X, axis=0), X2, axis=1)
# print(f"Kff: {cov.shape}")
return cov
def K_diag(self, X, presliced=False):
return tf.linalg.diag_part(self.K(X))
def K_diag_tr(self):
base_cov = (self.variance * tf.matmul(self.tr_feature_mat, self.tr_feature_mat, transpose_b=True) + self.offset) ** self.degree
if self.sparse:
cov = tf.sparse.sparse_dense_matmul(self.tr_sparse_P, base_cov)
cov = tf.sparse.sparse_dense_matmul(self.tr_sparse_P, cov, adjoint_b=True)
else:
cov = tf.matmul(self.tr_sparse_P, base_cov)
cov = tf.matmul(self.tr_sparse_P, cov, adjoint_b=True)
cov = tf.gather(tf.gather(cov, self.idx_train_relative, axis=0), self.idx_train_relative, axis=1)
return tf.linalg.diag_part(cov)
class NodeInducingPoints(InducingPointsBase):
"""
Set of real-valued inducing points. See parent-class for details.
"""
pass
@cov.Kuu.register(NodeInducingPoints, GraphPolynomial)
def Kuu_graph_polynomial(inducing_variable, kernel, jitter=None):
"""
Computes the covariance matrix between the inducing points (which are not
associated with any node).
:param inducing_variable: Set of inducing points of type
NodeInducingPoints.
:param kernel: Kernel of type GraphPolynomial.
:return: Covariance matrix between the inducing variables.
"""
Z = inducing_variable.Z
cov = (kernel.variance * (tf.matmul(Z, Z, transpose_b=True)) + kernel.offset) ** kernel.degree
return cov
@cov.Kuf.register(NodeInducingPoints, GraphPolynomial, tf.Tensor)
def Kuf_graph_polynomial(inducing_variable, kernel, X):
"""
Computes the covariance matrix between inducing points (which are not
associated with any node) and normal inputs.
:param inducing_variable: Set of inducing points of type
NodeInducingPoints.
:param kernel: Kernel of type GraphPolynomial.
:param X: Normal inputs. Note, however, that to simplify the
implementation, we pass in the indices of the nodes rather than their
features directly.
:return: Covariance matrix between inducing variables and inputs.
"""
X = tf.reshape(tf.cast(X, tf.int32), [-1])
Z = inducing_variable.Z
base_cov = (kernel.variance * tf.matmul(kernel.feature_mat, Z, adjoint_b=True) + kernel.offset)**kernel.degree
cov = tf.sparse.sparse_dense_matmul(kernel.sparse_P, base_cov)
cov = tf.gather(tf.transpose(cov), X, axis=1)
return cov