First public version of talk and code.

lschmelzeisen · Sep 24, 2018 · 0798fba · 0798fba
1 parent 006eee1
commit 0798fba
Show file tree

Hide file tree

Showing 28 changed files with 147,923 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+.idea
diff --git a/01_simple_linear_regression__disturbed_line.ipynb b/01_simple_linear_regression__disturbed_line.ipynb
@@ -0,0 +1,165 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import tensorflow as tf\n",
+    "\n",
+    "# Prints numpy arrays nicer\n",
+    "np.set_printoptions(precision=2, suppress=True, linewidth=100)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def target_function(x):\n",
+    "    return x * 0.5 - 4\n",
+    "\n",
+    "\n",
+    "num_samples = 30\n",
+    "# Randomly sampled values in [-10, 10]\n",
+    "xs = np.random.uniform(low=-10, high=10, size=num_samples)\n",
+    "# Intended target value plus random noise\n",
+    "ys = target_function(xs) + np.random.normal(loc=0, scale=1, size=num_samples)\n",
+    "\n",
+    "data = np.array(list(zip(xs, ys)))\n",
+    "print('data:')\n",
+    "print(data)\n",
+    "\n",
+    "plt.figure(dpi=150)\n",
+    "plt.title('Data')\n",
+    "plt.xlabel('x')\n",
+    "plt.ylabel('y')\n",
+    "plt.plot([-12, 12], [target_function(-12), target_function(12)],\n",
+    "         color='#458588', label='target_function')\n",
+    "plt.scatter(xs, ys, color='#458588', label='data')\n",
+    "plt.legend()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Hyperparameters\n",
+    "learning_rate = 0.005\n",
+    "num_epochs = 20\n",
+    "\n",
+    "# Model Definition\n",
+    "x = tf.placeholder(tf.float32)\n",
+    "y = tf.placeholder(tf.float32)\n",
+    "\n",
+    "m = tf.Variable(1.0)\n",
+    "b = tf.Variable(0.0)\n",
+    "\n",
+    "y_prediction = x * m + b\n",
+    "\n",
+    "loss = (y - y_prediction) ** 2\n",
+    "train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with tf.Session() as sess:\n",
+    "    sess.run(tf.global_variables_initializer())\n",
+    "\n",
+    "    # Training\n",
+    "    losses = []  # Storing losses so we can plot them later\n",
+    "    for epoch in range(num_epochs):\n",
+    "        np.random.shuffle(data)\n",
+    "        cumulative_loss = 0\n",
+    "        for _x, _y in data:\n",
+    "            _loss, _train_op = sess.run(\n",
+    "                (loss, train_op), feed_dict={x: _x, y: _y})\n",
+    "            cumulative_loss += _loss\n",
+    "        average_loss = cumulative_loss / len(data)\n",
+    "        print('Epoch: {}, Loss: {}'.format(epoch + 1, average_loss))\n",
+    "        losses.append(average_loss)\n",
+    "\n",
+    "    # Introspection\n",
+    "    print()\n",
+    "    _m, _b = sess.run([m, b])\n",
+    "    print('Estimated m:', _m)\n",
+    "    print('Estimated b:', _b)\n",
+    "\n",
+    "    # Prediction\n",
+    "    ys_actual = []\n",
+    "    ys_predicted = []\n",
+    "    for _x, _y in data:\n",
+    "        ys_actual.append(_y)\n",
+    "        ys_predicted.append(sess.run(y_prediction, feed_dict={x: _x}))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(dpi=150)\n",
+    "plt.title('Loss over Time')\n",
+    "plt.xlabel('Epoch')\n",
+    "plt.ylabel('Loss')\n",
+    "plt.plot(range(len(losses)), losses, color='#458588')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(dpi=150)\n",
+    "plt.title('Actual Function vs Estimated Function')\n",
+    "plt.xlabel('x')\n",
+    "plt.ylabel('y')\n",
+    "plt.plot([-12, 12], [target_function(-12), target_function(12)],\n",
+    "         color='#458588', label='target_function')\n",
+    "plt.scatter(xs, ys, color='#458588', label='data')\n",
+    "plt.plot([-12, 12], [-12 * _m + _b, 12 * _m + _b],\n",
+    "         color='#CC241D', label='estimated_function')\n",
+    "plt.legend()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(dpi=150)\n",
+    "plt.title('Actual vs Predicted Data Points')\n",
+    "plt.xlabel('Actual y-value')\n",
+    "plt.ylabel('Predicted y-value')\n",
+    "plt.plot([min(ys_actual), max(ys_actual)], [min(ys_actual), max(ys_actual)],\n",
+    "         color='#1D2021', linestyle='--')\n",
+    "plt.scatter(ys_actual, ys_predicted, color='#458588')\n",
+    "plt.show()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "name": "python3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/02_simple_linear_regression__housing.ipynb b/02_simple_linear_regression__housing.ipynb
@@ -0,0 +1,171 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time\n",
+    "\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import sklearn.datasets\n",
+    "import tensorflow as tf\n",
+    "\n",
+    "# Prints numpy arrays nicer\n",
+    "np.set_printoptions(precision=2, suppress=True, linewidth=100)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "boston_housing = sklearn.datasets.load_boston()\n",
+    "# Uncomment the following line for a description of the dataset.\n",
+    "# print(boston_housing['DESCR'])\n",
+    "# Consider only the number-of-rooms feature for this experiment.\n",
+    "xs = boston_housing.data[:, list(boston_housing.feature_names).index('RM')]\n",
+    "ys = boston_housing.target\n",
+    "\n",
+    "data = list(zip(xs, ys))\n",
+    "\n",
+    "# Perform 60% / 40% training/test split\n",
+    "split_index = int(len(data) * 0.6)\n",
+    "train_data = data[:split_index]\n",
+    "test_data = data[split_index:]\n",
+    "print('Num training examples:', len(train_data))\n",
+    "print('Num testing examples:', len(test_data))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Hyperparameters\n",
+    "learning_rate = 0.005\n",
+    "num_epochs = 100\n",
+    "\n",
+    "# Model Definition\n",
+    "x = tf.placeholder(tf.float32)\n",
+    "y = tf.placeholder(tf.float32)\n",
+    "\n",
+    "m = tf.Variable(1.0)\n",
+    "b = tf.Variable(0.0)\n",
+    "\n",
+    "y_prediction = x * m + b\n",
+    "\n",
+    "loss = (y - y_prediction) ** 2\n",
+    "train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with tf.Session() as sess:\n",
+    "    sess.run(tf.global_variables_initializer())\n",
+    "\n",
+    "    # Training\n",
+    "    time_before = time.time()\n",
+    "    losses = []  # Storing losses so we can plot them later\n",
+    "    for epoch in range(num_epochs):\n",
+    "        np.random.shuffle(train_data)\n",
+    "        cumulative_loss = 0\n",
+    "        for train_x, train_y in train_data:\n",
+    "            _loss, _train_op = sess.run(\n",
+    "                (loss, train_op), feed_dict={x: train_x, y: train_y})\n",
+    "            cumulative_loss += _loss\n",
+    "        average_loss = cumulative_loss / len(train_data)\n",
+    "        if epoch % 5 == 4:\n",
+    "            print('Epoch: {}, Loss: {}'.format(epoch + 1, average_loss))\n",
+    "        losses.append(average_loss)\n",
+    "    time_after = time.time()\n",
+    "    print('Training took {:.2f}s.'.format(time_after - time_before))\n",
+    "\n",
+    "    # Introspection\n",
+    "    print()\n",
+    "    _m, _b = sess.run([m, b])\n",
+    "    print('Estimated m:', _m)\n",
+    "    print('Estimated b:', _b)\n",
+    "\n",
+    "    # Prediction\n",
+    "    train_ys = []\n",
+    "    train_ys_prediction = []\n",
+    "    for train_x, train_y in train_data:\n",
+    "        train_ys.append(train_y)\n",
+    "        train_ys_prediction.append(\n",
+    "            sess.run(y_prediction, feed_dict={x: train_x}))\n",
+    "    train_ys = np.array(train_ys)\n",
+    "    train_ys_prediction = np.array(train_ys_prediction)\n",
+    "\n",
+    "    test_ys = []\n",
+    "    test_ys_prediction = []\n",
+    "    for test_x, test_y in test_data:\n",
+    "        test_ys.append(test_y)\n",
+    "        test_ys_prediction.append(sess.run(y_prediction, feed_dict={x: test_x}))\n",
+    "    test_ys = np.array(test_ys)\n",
+    "    test_ys_prediction = np.array(test_ys_prediction)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(dpi=150)\n",
+    "plt.title('Loss over Time')\n",
+    "plt.xlabel('Epoch')\n",
+    "plt.ylabel('Loss')\n",
+    "plt.plot(range(len(losses)), losses, color='#458588')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_mean_squared_error = np.mean((train_ys - train_ys_prediction) ** 2)\n",
+    "test_mean_squared_error = np.mean((test_ys - test_ys_prediction) ** 2)\n",
+    "\n",
+    "print('Mean Squared Error on Training data:', train_mean_squared_error)\n",
+    "print('Mean Squared Error on Testing data:', test_mean_squared_error)\n",
+    "\n",
+    "plt.figure(dpi=150)\n",
+    "plt.title('Actual vs Predicted Data Points: Training Set')\n",
+    "plt.xlabel('Actual House Price')\n",
+    "plt.ylabel('Predicted House Price')\n",
+    "plt.plot([min(train_ys), max(train_ys)], [min(train_ys), max(train_ys)],\n",
+    "         color='#1D2021', linestyle='--')\n",
+    "plt.scatter(train_ys, train_ys_prediction, color='#458588')\n",
+    "plt.show()\n",
+    "\n",
+    "plt.figure(dpi=150)\n",
+    "plt.title('Actual vs Predicted Data Points: Testing Set')\n",
+    "plt.xlabel('Actual House Price')\n",
+    "plt.ylabel('Predicted House Price')\n",
+    "plt.plot([min(test_ys), max(test_ys)], [min(test_ys), max(test_ys)],\n",
+    "         color='#1D2021', linestyle='--')\n",
+    "plt.scatter(test_ys, test_ys_prediction, color='#CC241D')\n",
+    "plt.show()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "name": "python3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}