{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 446,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from random import randint\n",
    "import matplotlib.pyplot as plt \n",
    "from numpy.linalg import pinv\n",
    "import sys"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 447,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def gen_data(n = 1100):\n",
    "    a = (np.random.rand() - 0.5)*10\n",
    "    b = (np.random.rand() - 0.5)*10\n",
    "    x = (np.random.rand(n) - 0.5)*10\n",
    "    y = (np.random.rand(n) - 0.5)*10\n",
    "    z = np.ones(n)\n",
    "    for i in range(0,n):\n",
    "        if a*x[i] + b < y[i]:\n",
    "            z[i] = - 1\n",
    "        if np.random.rand(1) < 0.1:\n",
    "            z[i] *= -1\n",
    "    color = ['red' if l == 0 else 'green' for l in z]\n",
    "    return np.stack([x, y],axis =1), z, a, b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 448,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def E(x,y,w):\n",
    "    misses = 0\n",
    "    for i in range(0, len(x)):\n",
    "        if y[i]*np.dot(x[i],w) < 0:\n",
    "            misses += 1\n",
    "    return misses/len(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 449,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def PocketPLA(x,y,iterations = 1000):\n",
    "    x = np.c_[x,np.ones(len(x))]\n",
    "    w = np.random.rand(3)\n",
    "    loss = 1\n",
    "    it = 0\n",
    "    for j in range(0,iterations):\n",
    "         for i in range(0,x.shape[0]):\n",
    "                it += 1\n",
    "                if it > iterations:\n",
    "                    return w\n",
    "                if y[i]*np.dot(x[i],w) < 0:\n",
    "                    new_w = w + x[i]*y[i]\n",
    "                    new_loss = E(x,y,new_w)\n",
    "                    if new_loss < loss:\n",
    "                        w = new_w\n",
    "                        loss = new_loss\n",
    "    return w"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 450,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def E_lin(X,Y,w):\n",
    "    X  = np.c_[X,np.ones(len(X))]\n",
    "    sum = np.sum((np.dot(X,w) - Y)**2)/len(X)\n",
    "    return sum"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 451,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def linReg(x_train, y_train):\n",
    "    x_train  = np.c_[x_train,np.ones(len(x_train))]\n",
    "    X = np.dot(pinv(np.dot(np.transpose(x_train),x_train)),np.transpose(x_train))\n",
    "    w_lin = np.dot(X, y_train)\n",
    "    return w_lin"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 452,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "elins = []\n",
    "eps = []\n",
    "for i in range (0, 100):\n",
    "    X, Y, a, b = gen_data(1100)\n",
    "    x = X[:100]\n",
    "    y = Y[:100]\n",
    "    w = PocketPLA(x, y)\n",
    "    elins.append(E(np.c_[x,np.ones(len(x))],y,w))\n",
    "    w_lin = linReg(x,y)\n",
    "    eps.append(E(np.c_[x,np.ones(len(x))],y,w_lin))\n",
    "    i+=1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 453,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.collections.PathCollection at 0x116dfa4e0>"
      ]
     },
     "execution_count": 453,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "plt.scatter(range(0,100), elins, color = 'red')\n",
    "plt.scatter(range(0,100), eps , color = \"blue\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 455,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.13089999999999999"
      ]
     },
     "execution_count": 455,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.array(eps).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 456,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.14700000000000002"
      ]
     },
     "execution_count": 456,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.array(elins).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [conda root]",
   "language": "python",
   "name": "conda-root-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}