SVM.py

from __future__ import division, print_function
import os
import numpy as np
import random as rnd
from csv_reader import load


class SVM():
    
    def __init__(self, max_iter=10000, kernel_type='linear', C=1.0, epsilon=0.001):
        self.kernels = {
            'linear' : self.kernel_linear,
            'quadratic' : self.kernel_quadratic
        }
        self.max_iter = max_iter
        self.kernel_type = kernel_type
        self.C = C
        self.epsilon = epsilon


    def fit(self, X, y):
        # Initialization
        n, d = X.shape[0], X.shape[1]
        alpha = np.zeros((n))
        kernel = self.kernels[self.kernel_type]
        count = 0
        while True:
            count += 1
            alpha_prev = np.copy(alpha)
            for j in range(0, n):
                i = self.get_rnd_int(0, n-1, j) # Get random int i~=j
            
                x_i, x_j, y_i, y_j = X[i], X[j], y[i], y[j]
                k_ij = kernel(x_i, x_j) #+kernel(x_j, x_j) - 2 * kernel(x_i, x_j)
                if k_ij == 0:
                    continue
                alpha_prime_j, alpha_prime_i = alpha[j], alpha[i]
                (L, H) = self.compute_L_H(self.C, alpha_prime_j, alpha_prime_i, y_j, y_i)

                # Compute w & b
                self.w = self.calc_w(alpha, y, X)
                self.b = self.calc_b(X, y, self.w)

                # Compute E_i, E_j
                E_i = self.E(x_i, y_i, self.w, self.b)
                E_j = self.E(x_j, y_j, self.w, self.b)

                # Set new alpha values
                alpha[j] = alpha_prime_j + float(y_j * (E_i - E_j))/k_ij
                alpha[j] = max(alpha[j], L)
                alpha[j] = min(alpha[j], H)

                alpha[i] = alpha_prime_i + y_i*y_j * (alpha_prime_j - alpha[j])

            # Check convergence
            diff = np.linalg.norm(alpha - alpha_prev)

            if diff < self.epsilon:
                break

            if count >= self.max_iter:
                print("Iteration number exceeded the max of %d iterations" % (self.max_iter))
                return
        
        # Compute final model parameters
        self.b = self.calc_b(X, y, self.w)

        if self.kernel_type == 'linear':
            self.w = self.calc_w(alpha, y, X)
        
        # Get support vectors

        alpha_idx = np.where(alpha > 0)[0]
        support_vectors = X[alpha_idx, :]
        return support_vectors, count
    
    def predict(self, X):
        return self.h(X, self.w, self.b)
    
    def calc_b(self, X, y, w):
        b_tmp = y - np.dot(w.T, X.T)
        return np.mean(b_tmp)
    
    def calc_w(self, alpha, y, X):
        return np.dot(alpha * y, X)
    
    # Prediction
    def h(self, X, w, b):
        return np.sign(np.dot(w.T, X.T) + b).astype(int)
    
    # Prediction error
    def E(self, x_k, y_k, w, b):
        return self.h(x_k, w, b) - y_k

    def compute_L_H(self, C, alpha_prime_j, alpha_prime_i, y_j, y_i):
        if(y_i != y_j):
            return (max(0, alpha_prime_j - alpha_prime_i), min(C, C - alpha_prime_i + alpha_prime_j))
        else:
            return (max(0, alpha_prime_i + alpha_prime_j - C), min(C, alpha_prime_i + alpha_prime_j))

    def get_rnd_int(self, a,b,z):
        i = z
        while i == z:
            i = rnd.randint(a,b)
        return i

    # Define kernels
    def kernel_linear(self, x1, x2):
        return np.dot(x1.T, x2)

    def kernel_quadratic(self, x1, x2):
        return (np.dot(x1.T, x2) ** 2)

# Calculate accuracy
def calc_acc(y, y_hat):
    label_pos = np.where(y_hat == 1)
    TP, TN = (0,0)
    
    for each_i in range(len(label_pos)):
        TP = np.sum([ 1 for i,j in zip(y,y_hat) if y[i]==y_hat[j]])
    label_neg = np.where(y_hat == -1)
    for each_i in range(len(label_neg)):
        TP = np.sum([ 1 for i,j in zip(y,y_hat) if y[i]==y_hat[j]])
    return float(TP + TN)/len(y)


print("Input training data")
train = load()
print("Input test data")
test = load()
X_train = np.array([x for (x,y) in train]).astype(dtype=float)
Y_train = np.array([y for (x,y) in train]).astype(dtype=int)
X_test = np.array([x for (x,y) in test]).astype(dtype=float)
Y_test = np.array([y for (x,y) in test]).astype(dtype=int)
svm = SVM(kernel_type='quadratic')
sup_vec, iterations = svm.fit(X_train, Y_train)
y_hat = []
for i in range(len(X_test)):
    y_hat.append(svm.predict(X_test[i]))  # make predictions


acc = calc_acc(Y_test, y_hat)

print("Support vector count: %d" % (len(sup_vec)))
print("bias:\t\t%.3f" % (svm.b))
print("w:\t\t" + str(svm.w))
print("accuracy:\t%.3f" % (acc))
# print("Converged after %d iterations" % (iterations))