{ "cells": [ { "cell_type": "code", "execution_count": 446, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from random import randint\n", "import matplotlib.pyplot as plt \n", "from numpy.linalg import pinv\n", "import sys" ] }, { "cell_type": "code", "execution_count": 447, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def gen_data(n = 1100):\n", " a = (np.random.rand() - 0.5)*10\n", " b = (np.random.rand() - 0.5)*10\n", " x = (np.random.rand(n) - 0.5)*10\n", " y = (np.random.rand(n) - 0.5)*10\n", " z = np.ones(n)\n", " for i in range(0,n):\n", " if a*x[i] + b < y[i]:\n", " z[i] = - 1\n", " if np.random.rand(1) < 0.1:\n", " z[i] *= -1\n", " color = ['red' if l == 0 else 'green' for l in z]\n", " return np.stack([x, y],axis =1), z, a, b" ] }, { "cell_type": "code", "execution_count": 448, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def E(x,y,w):\n", " misses = 0\n", " for i in range(0, len(x)):\n", " if y[i]*np.dot(x[i],w) < 0:\n", " misses += 1\n", " return misses/len(x)" ] }, { "cell_type": "code", "execution_count": 449, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def PocketPLA(x,y,iterations = 1000):\n", " x = np.c_[x,np.ones(len(x))]\n", " w = np.random.rand(3)\n", " loss = 1\n", " it = 0\n", " for j in range(0,iterations):\n", " for i in range(0,x.shape[0]):\n", " it += 1\n", " if it > iterations:\n", " return w\n", " if y[i]*np.dot(x[i],w) < 0:\n", " new_w = w + x[i]*y[i]\n", " new_loss = E(x,y,new_w)\n", " if new_loss < loss:\n", " w = new_w\n", " loss = new_loss\n", " return w" ] }, { "cell_type": "code", "execution_count": 450, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def E_lin(X,Y,w):\n", " X = np.c_[X,np.ones(len(X))]\n", " sum = np.sum((np.dot(X,w) - Y)**2)/len(X)\n", " return sum" ] }, { "cell_type": "code", "execution_count": 451, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def linReg(x_train, y_train):\n", " x_train = np.c_[x_train,np.ones(len(x_train))]\n", " X = np.dot(pinv(np.dot(np.transpose(x_train),x_train)),np.transpose(x_train))\n", " w_lin = np.dot(X, y_train)\n", " return w_lin" ] }, { "cell_type": "code", "execution_count": 452, "metadata": { "collapsed": false }, "outputs": [], "source": [ "elins = []\n", "eps = []\n", "for i in range (0, 100):\n", " X, Y, a, b = gen_data(1100)\n", " x = X[:100]\n", " y = Y[:100]\n", " w = PocketPLA(x, y)\n", " elins.append(E(np.c_[x,np.ones(len(x))],y,w))\n", " w_lin = linReg(x,y)\n", " eps.append(E(np.c_[x,np.ones(len(x))],y,w_lin))\n", " i+=1" ] }, { "cell_type": "code", "execution_count": 453, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "<matplotlib.collections.PathCollection at 0x116dfa4e0>" ] }, "execution_count": 453, "metadata": {}, "output_type": "execute_result" } ], "source": [ "plt.scatter(range(0,100), elins, color = 'red')\n", "plt.scatter(range(0,100), eps , color = \"blue\")" ] }, { "cell_type": "code", "execution_count": 455, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0.13089999999999999" ] }, "execution_count": 455, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.array(eps).mean()" ] }, { "cell_type": "code", "execution_count": 456, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0.14700000000000002" ] }, "execution_count": 456, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.array(elins).mean()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [conda root]", "language": "python", "name": "conda-root-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 2 }