Skip to content

Commit 0220c79

Browse files
committed
SVD reconstruction mean squared error and orthogonality error
1 parent 5e6e0e2 commit 0220c79

File tree

2 files changed

+110
-0
lines changed

2 files changed

+110
-0
lines changed

recommend.py

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
from __future__ import division
2+
##
3+
import numpy as np
4+
#from prettyplotlib import plt
5+
from scipy.io import mmread
6+
##
7+
#from incremental_svd2 import incremental_SVD
8+
#from svd_reconstruct import single_dot
9+
10+
11+
def preprocess_recommender(m):
12+
# Remove sparsity by filling matrix with average movie rating
13+
# Normalize each entry by customer's average rating
14+
pass
15+
16+
if __name__ == '__main__':
17+
train = np.matrix(mmread('subset_train.mtx').todense())
18+
test = np.loadtxt('subset_test.txt')
19+
print 'Using matrix of size {}'.format(train.shape)

svd_reconstruct.py

+91
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
from __future__ import division
2+
##
3+
import numpy as np
4+
import scipy.linalg
5+
from prettyplotlib import plt
6+
from scipy.io import mmread
7+
from sklearn.metrics import mean_squared_error
8+
##
9+
from incremental_svd2 import incremental_SVD
10+
11+
12+
def single_dot(u, svT, x, y):
13+
colU = u[y, :]
14+
rowV = svT[:, x]
15+
return (colU).dot(rowV)
16+
17+
18+
def check_orthogonality(A):
19+
return np.trace(abs(A.T.dot(A) - np.diag([1] * min(A.shape))))
20+
21+
if __name__ == '__main__':
22+
train = np.matrix(mmread('subset_train.mtx').todense())
23+
train = train[0:200, 0:100]
24+
print 'Using matrix of size {}'.format(train.shape)
25+
26+
print 'Testing SVD'
27+
svdX = []
28+
svdY = []
29+
orthoX = []
30+
orthoY = []
31+
u, s, vT = scipy.linalg.svd(train)
32+
assert np.allclose(train, u.dot(scipy.linalg.diagsvd(s, u.shape[0], vT.shape[1]).dot(vT)))
33+
# See the loss in performance as we perform low-rank approximations
34+
for k in xrange(1, 100):
35+
low_s = [s[i] for i in xrange(k)] + (min(u.shape[0], vT.shape[1]) - k) * [0]
36+
reconstruct = u.dot(scipy.linalg.diagsvd(low_s, u.shape[0], vT.shape[1]).dot(vT))
37+
err = mean_squared_error(train, reconstruct)
38+
print 'Exact SVD with low-rank approximation {}'.format(k)
39+
#print err
40+
#print
41+
svdX.append(k)
42+
svdY.append(err)
43+
orthoX.append(k)
44+
orthoY.append(check_orthogonality(u))
45+
plt.plot(svdX, svdY, label="SVD", color='black', linewidth='2', linestyle='--')
46+
47+
print
48+
print 'Testing incremental SVD'
49+
incr_ortho = []
50+
for num in xrange(100, 1001, 300):
51+
print '... with block size of {}'.format(num)
52+
X, Y = [], []
53+
incr_orthoY = []
54+
for k in xrange(1, 101, 1):
55+
if k % 25 == 0:
56+
print ' ... up to k={}'.format(k)
57+
u, s, vT = incremental_SVD(train, k, num)
58+
reconstruct = u.dot(s.dot(vT))
59+
X.append(k)
60+
Y.append(mean_squared_error(train, reconstruct))
61+
incr_orthoY.append(check_orthogonality(u))
62+
incr_ortho.append(['iSVD n={}'.format(num), X, incr_orthoY])
63+
plt.plot(X, Y, label='iSVD n={}'.format(num))
64+
"""
65+
print 'Testing raw SVD => exact reconstruction'
66+
svT = scipy.linalg.diagsvd(s, u.shape[0], vT.shape[1]).dot(vT)
67+
for y in xrange(train.shape[0]):
68+
for x in xrange(train.shape[1]):
69+
colU = u[y, :]
70+
rowV = svT[:, x]
71+
assert np.allclose(train[y, x], single_dot(u, svT, x, y))
72+
"""
73+
##
74+
plt.title('SVD reconstruction error on {}x{} matrix'.format(*train.shape))
75+
plt.xlabel('Low rank approximation')
76+
plt.ylabel('Mean Squared Error')
77+
plt.ylim(0, max(svdY))
78+
plt.legend(loc='best')
79+
plt.savefig('reconstruct_error_{}x{}.pdf'.format(*train.shape))
80+
plt.show(block=True)
81+
##
82+
plt.plot(svdX, svdY, label="SVD", color='black', linewidth='2', linestyle='--')
83+
for label, X, Y in incr_ortho:
84+
plt.plot(X, Y, label=label)
85+
plt.title('SVD orthogonality error on {}x{} matrix'.format(*train.shape))
86+
plt.xlabel('Low rank approximation')
87+
plt.ylabel('Orthogonality error')
88+
#plt.ylim(0, max(orthoY))
89+
plt.legend(loc='best')
90+
plt.savefig('reconstruct_ortho_{}x{}.pdf'.format(*train.shape))
91+
plt.show(block=True)

0 commit comments

Comments
 (0)