Skip to content

Commit

Permalink
First public version of talk and code.
Browse files Browse the repository at this point in the history
  • Loading branch information
lschmelzeisen committed Sep 24, 2018
1 parent 006eee1 commit 0798fba
Show file tree
Hide file tree
Showing 28 changed files with 147,923 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.idea
165 changes: 165 additions & 0 deletions 01_simple_linear_regression__disturbed_line.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import tensorflow as tf\n",
"\n",
"# Prints numpy arrays nicer\n",
"np.set_printoptions(precision=2, suppress=True, linewidth=100)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def target_function(x):\n",
" return x * 0.5 - 4\n",
"\n",
"\n",
"num_samples = 30\n",
"# Randomly sampled values in [-10, 10]\n",
"xs = np.random.uniform(low=-10, high=10, size=num_samples)\n",
"# Intended target value plus random noise\n",
"ys = target_function(xs) + np.random.normal(loc=0, scale=1, size=num_samples)\n",
"\n",
"data = np.array(list(zip(xs, ys)))\n",
"print('data:')\n",
"print(data)\n",
"\n",
"plt.figure(dpi=150)\n",
"plt.title('Data')\n",
"plt.xlabel('x')\n",
"plt.ylabel('y')\n",
"plt.plot([-12, 12], [target_function(-12), target_function(12)],\n",
" color='#458588', label='target_function')\n",
"plt.scatter(xs, ys, color='#458588', label='data')\n",
"plt.legend()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Hyperparameters\n",
"learning_rate = 0.005\n",
"num_epochs = 20\n",
"\n",
"# Model Definition\n",
"x = tf.placeholder(tf.float32)\n",
"y = tf.placeholder(tf.float32)\n",
"\n",
"m = tf.Variable(1.0)\n",
"b = tf.Variable(0.0)\n",
"\n",
"y_prediction = x * m + b\n",
"\n",
"loss = (y - y_prediction) ** 2\n",
"train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with tf.Session() as sess:\n",
" sess.run(tf.global_variables_initializer())\n",
"\n",
" # Training\n",
" losses = [] # Storing losses so we can plot them later\n",
" for epoch in range(num_epochs):\n",
" np.random.shuffle(data)\n",
" cumulative_loss = 0\n",
" for _x, _y in data:\n",
" _loss, _train_op = sess.run(\n",
" (loss, train_op), feed_dict={x: _x, y: _y})\n",
" cumulative_loss += _loss\n",
" average_loss = cumulative_loss / len(data)\n",
" print('Epoch: {}, Loss: {}'.format(epoch + 1, average_loss))\n",
" losses.append(average_loss)\n",
"\n",
" # Introspection\n",
" print()\n",
" _m, _b = sess.run([m, b])\n",
" print('Estimated m:', _m)\n",
" print('Estimated b:', _b)\n",
"\n",
" # Prediction\n",
" ys_actual = []\n",
" ys_predicted = []\n",
" for _x, _y in data:\n",
" ys_actual.append(_y)\n",
" ys_predicted.append(sess.run(y_prediction, feed_dict={x: _x}))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.figure(dpi=150)\n",
"plt.title('Loss over Time')\n",
"plt.xlabel('Epoch')\n",
"plt.ylabel('Loss')\n",
"plt.plot(range(len(losses)), losses, color='#458588')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.figure(dpi=150)\n",
"plt.title('Actual Function vs Estimated Function')\n",
"plt.xlabel('x')\n",
"plt.ylabel('y')\n",
"plt.plot([-12, 12], [target_function(-12), target_function(12)],\n",
" color='#458588', label='target_function')\n",
"plt.scatter(xs, ys, color='#458588', label='data')\n",
"plt.plot([-12, 12], [-12 * _m + _b, 12 * _m + _b],\n",
" color='#CC241D', label='estimated_function')\n",
"plt.legend()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.figure(dpi=150)\n",
"plt.title('Actual vs Predicted Data Points')\n",
"plt.xlabel('Actual y-value')\n",
"plt.ylabel('Predicted y-value')\n",
"plt.plot([min(ys_actual), max(ys_actual)], [min(ys_actual), max(ys_actual)],\n",
" color='#1D2021', linestyle='--')\n",
"plt.scatter(ys_actual, ys_predicted, color='#458588')\n",
"plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
171 changes: 171 additions & 0 deletions 02_simple_linear_regression__housing.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import time\n",
"\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import sklearn.datasets\n",
"import tensorflow as tf\n",
"\n",
"# Prints numpy arrays nicer\n",
"np.set_printoptions(precision=2, suppress=True, linewidth=100)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"boston_housing = sklearn.datasets.load_boston()\n",
"# Uncomment the following line for a description of the dataset.\n",
"# print(boston_housing['DESCR'])\n",
"# Consider only the number-of-rooms feature for this experiment.\n",
"xs = boston_housing.data[:, list(boston_housing.feature_names).index('RM')]\n",
"ys = boston_housing.target\n",
"\n",
"data = list(zip(xs, ys))\n",
"\n",
"# Perform 60% / 40% training/test split\n",
"split_index = int(len(data) * 0.6)\n",
"train_data = data[:split_index]\n",
"test_data = data[split_index:]\n",
"print('Num training examples:', len(train_data))\n",
"print('Num testing examples:', len(test_data))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Hyperparameters\n",
"learning_rate = 0.005\n",
"num_epochs = 100\n",
"\n",
"# Model Definition\n",
"x = tf.placeholder(tf.float32)\n",
"y = tf.placeholder(tf.float32)\n",
"\n",
"m = tf.Variable(1.0)\n",
"b = tf.Variable(0.0)\n",
"\n",
"y_prediction = x * m + b\n",
"\n",
"loss = (y - y_prediction) ** 2\n",
"train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with tf.Session() as sess:\n",
" sess.run(tf.global_variables_initializer())\n",
"\n",
" # Training\n",
" time_before = time.time()\n",
" losses = [] # Storing losses so we can plot them later\n",
" for epoch in range(num_epochs):\n",
" np.random.shuffle(train_data)\n",
" cumulative_loss = 0\n",
" for train_x, train_y in train_data:\n",
" _loss, _train_op = sess.run(\n",
" (loss, train_op), feed_dict={x: train_x, y: train_y})\n",
" cumulative_loss += _loss\n",
" average_loss = cumulative_loss / len(train_data)\n",
" if epoch % 5 == 4:\n",
" print('Epoch: {}, Loss: {}'.format(epoch + 1, average_loss))\n",
" losses.append(average_loss)\n",
" time_after = time.time()\n",
" print('Training took {:.2f}s.'.format(time_after - time_before))\n",
"\n",
" # Introspection\n",
" print()\n",
" _m, _b = sess.run([m, b])\n",
" print('Estimated m:', _m)\n",
" print('Estimated b:', _b)\n",
"\n",
" # Prediction\n",
" train_ys = []\n",
" train_ys_prediction = []\n",
" for train_x, train_y in train_data:\n",
" train_ys.append(train_y)\n",
" train_ys_prediction.append(\n",
" sess.run(y_prediction, feed_dict={x: train_x}))\n",
" train_ys = np.array(train_ys)\n",
" train_ys_prediction = np.array(train_ys_prediction)\n",
"\n",
" test_ys = []\n",
" test_ys_prediction = []\n",
" for test_x, test_y in test_data:\n",
" test_ys.append(test_y)\n",
" test_ys_prediction.append(sess.run(y_prediction, feed_dict={x: test_x}))\n",
" test_ys = np.array(test_ys)\n",
" test_ys_prediction = np.array(test_ys_prediction)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.figure(dpi=150)\n",
"plt.title('Loss over Time')\n",
"plt.xlabel('Epoch')\n",
"plt.ylabel('Loss')\n",
"plt.plot(range(len(losses)), losses, color='#458588')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"train_mean_squared_error = np.mean((train_ys - train_ys_prediction) ** 2)\n",
"test_mean_squared_error = np.mean((test_ys - test_ys_prediction) ** 2)\n",
"\n",
"print('Mean Squared Error on Training data:', train_mean_squared_error)\n",
"print('Mean Squared Error on Testing data:', test_mean_squared_error)\n",
"\n",
"plt.figure(dpi=150)\n",
"plt.title('Actual vs Predicted Data Points: Training Set')\n",
"plt.xlabel('Actual House Price')\n",
"plt.ylabel('Predicted House Price')\n",
"plt.plot([min(train_ys), max(train_ys)], [min(train_ys), max(train_ys)],\n",
" color='#1D2021', linestyle='--')\n",
"plt.scatter(train_ys, train_ys_prediction, color='#458588')\n",
"plt.show()\n",
"\n",
"plt.figure(dpi=150)\n",
"plt.title('Actual vs Predicted Data Points: Testing Set')\n",
"plt.xlabel('Actual House Price')\n",
"plt.ylabel('Predicted House Price')\n",
"plt.plot([min(test_ys), max(test_ys)], [min(test_ys), max(test_ys)],\n",
" color='#1D2021', linestyle='--')\n",
"plt.scatter(test_ys, test_ys_prediction, color='#CC241D')\n",
"plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Loading

0 comments on commit 0798fba

Please sign in to comment.