{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from IPython.core.interactiveshell import InteractiveShell\n", "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt # for plotting \n", "import seaborn as sns # for plotting\n", "from sklearn import datasets\n", "from sklearn import preprocessing\n", "from sklearn import linear_model\n", "from sklearn import model_selection as ms\n", "from sklearn.svm import SVC\n", "from sklearn.metrics import accuracy_score\n", "import timeit\n", "import scipy.sparse as sparse" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# set up some notebook display defaults\n", "InteractiveShell.ast_node_interactivity = \"all\"\n", "%matplotlib inline\n", "plt.style.use('default')\n", "sns.set()\n", "pd.options.display.float_format = '{:,.2f}'.format" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "csv_path = './data/mnist_digits/mnist_784.csv'\n", "df = pd.read_csv(csv_path)\n", "data = df.values" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(data.shape)\n", "print(data[:4,780:])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# now to make training and test sets\n", "# as I don't plan to run model, will make life easy, just take the first 60,000 rows as the training set\n", "X_trn = data[:60000, :-1]\n", "y_trn = data[:60000, -1]\n", "print(X_trn.shape, y_trn.shape)\n", "X_tst = data[60000: , :-1]\n", "y_tst = data[60000:, -1]\n", "print(X_tst.shape, y_tst.shape)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# probability function\n", "def compute_probabilities(X, theta, temp_parameter):\n", " \"\"\"\n", " Computes, for each datapoint X[i], the probability that X[i] is labeled as j\n", " for j = 0, 1, ..., k-1\n", "\n", " Args:\n", " X - (n, d) NumPy array (n datapoints each with d features)\n", " theta - (k, d) NumPy array, where row j represents the parameters of our model for label j\n", " temp_parameter - the temperature parameter of softmax function (scalar)\n", " Returns:\n", " H - (k, n) NumPy array, where each entry H[j][i] is the probability that X[i] is labeled as j\n", " \"\"\"\n", " n_lbls = theta.shape[0]\n", " n_d_pts = X.shape[0]\n", " probs = np.zeros(n_lbls)\n", " bases = np.array([np.e for _ in range(n_lbls)])\n", "\n", " r = 0\n", " for row in X:\n", " tmp_p = np.zeros(n_lbls)\n", " poss_c = [theta[i].dot(row) / temp_parameter for i in range(n_lbls)]\n", " c_use = max(poss_c)\n", " \n", " for j in range(n_lbls):\n", " tmp_p[j] = (theta[j].dot(row) / temp_parameter) - c_use\n", " tmp_p = np.power(bases, tmp_p)\n", " avg_div = tmp_p.sum()\n", " tmp_p = tmp_p / avg_div\n", "\n", " probs = np.column_stack((probs, tmp_p)) \n", " r += 1\n", " return probs[:,1:]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# let's make ourselves a test theta array and a couple of result arrays\n", "theta = np.zeros((10, 784))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%timeit -n 1 -r 1\n", "# let's time it for one iteration\n", "if True:\n", " p_60 = compute_probabilities(X_trn, theta, temp_parameter=1)\n", "# result: 1min 42s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "That's a fair bit of time. I think I will do my timing using the test dataset." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%timeit\n", "p_10 = compute_probabilities(X_tst, theta, temp_parameter=1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Using the iPython magic command `%%timeit` didn't allow me to access the value `p_60` or `p_10`. So, did some digging and found the following approach. You will see why I want those values." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "result_1 = []\n", "t_stmt = f'result_1.append(compute_probabilities(X_trn, theta, temp_parameter=1))'\n", "print(timeit.timeit(stmt=t_stmt, setup='from __main__ import result_1, compute_probabilities, X_trn, theta', number=1))\n", "p_60 = result_1[0] \n", "print(p_60)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "result_2 = []\n", "t_stmt = f'result_2.append(compute_probabilities(X_tst, theta, temp_parameter=1))'\n", "print(timeit.timeit(stmt=t_stmt, setup='from __main__ import result_2, compute_probabilities, X_tst, theta', number=5))\n", "p_10 = result_2[0] \n", "print(p_10)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# a different approach to timing the execution speed\n", "result_3 = []\n", "t_stmt = f'result_3.append(compute_probabilities(X_tst, theta, temp_parameter=1))'\n", "print(timeit.repeat(stmt=t_stmt, setup='from __main__ import result_3, compute_probabilities, X_tst, theta', repeat=7, number=1))\n", "p_10_2 = result_3[0]\n", "print(p_10 == p_10_2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# modified so as to not call probability function, added new parameter 'probs'\n", "def compute_cost_function(X, Y, theta, probs, lambda_factor, temp_parameter):\n", " \"\"\"\n", " Computes the total cost over every datapoint.\n", "\n", " Args:\n", " X - (n, d) NumPy array (n datapoints each with d features)\n", " Y - (n, ) NumPy array containing the labels (a number from 0-9) for each\n", " data point\n", " theta - (k, d) NumPy array, where row j represents the parameters of our\n", " model for label j\n", " lambda_factor - the regularization constant (scalar)\n", " temp_parameter - the temperature parameter of softmax function (scalar)\n", "\n", " Returns\n", " c - the cost value (scalar)\n", " \"\"\"\n", " #YOUR CODE HERE\n", " #probs = compute_probabilities(X, theta, temp_parameter)\n", " l_sum = 0\n", " r_sum = (lambda_factor / 2) * np.sum(theta*theta)\n", "\n", " for i in range(X.shape[0]):\n", " l_sum += np.log(probs[Y[i], i])\n", " l_sum = -(l_sum / X.shape[0])\n", "\n", " return l_sum + r_sum\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cost_1 = []\n", "t_stmt = f'cost_1.append(compute_cost_function(X_trn, y_trn, theta, p_60, lambda_factor=1.0e-4, temp_parameter=1))'\n", "print(timeit.timeit(stmt=t_stmt, setup='from __main__ import cost_1, compute_cost_function, X_trn, y_trn, theta, p_60', number=1))\n", "c_60 = cost_1[0] \n", "print(c_60)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cost_2 = []\n", "t_stmt = f'cost_2.append(compute_cost_function(X_trn, y_trn, theta, p_60, lambda_factor=1.0e-4, temp_parameter=1))'\n", "print(timeit.repeat(stmt=t_stmt, setup='from __main__ import cost_2, compute_cost_function, X_trn, y_trn, theta, p_60', repeat=7, number=3))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def make_onehot(v_col, n_smp, n_cat):\n", " return sparse.coo_matrix((np.ones(n_smp), (v_col, range(n_smp))), shape=(n_cat,n_smp)).toarray()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# modified so as to not call probability function, added new parameter 'probs'\n", "# also in course code, didn't have function for creating onehot encoding of target data\n", "def run_gradient_descent_iteration(X, Y, theta, probs, alpha, lambda_factor, temp_parameter):\n", " \"\"\"\n", " Runs one step of batch gradient descent\n", "\n", " Args:\n", " X - (n, d) NumPy array (n datapoints each with d features)\n", " Y - (n, ) NumPy array containing the labels (a number from 0-9) for each\n", " data point\n", " theta - (k, d) NumPy array, where row j represents the parameters of our\n", " model for label j\n", " alpha - the learning rate (scalar)\n", " lambda_factor - the regularization constant (scalar)\n", " temp_parameter - the temperature parameter of softmax function (scalar)\n", "\n", " Returns:\n", " theta - (k, d) NumPy array that is the final value of parameters theta\n", " \"\"\"\n", " #YOUR CODE HERE\n", " n_smp = X.shape[0]\n", " n_cat = theta.shape[0]\n", "\n", " # y_ohe = sparse.coo_matrix((np.ones(n_smp), (Y, range(n_smp))), shape=(n_cat,n_smp)).toarray()\n", " y_ohe = make_onehot(Y, n_smp, n_cat)\n", "\n", " # probs = compute_probabilities(X, theta, temp_parameter)\n", "\n", " tmp_dot = np.dot((y_ohe - probs),X)\n", "\n", " grad = (-1 / (n_smp * temp_parameter)) * tmp_dot + lambda_factor*theta\n", " theta = theta - (alpha * grad)\n", "\n", " return theta" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "gdi_1 = []\n", "t_stmt = f'gdi_1.append(run_gradient_descent_iteration(X_trn, y_trn, theta, p_60, alpha=0.3, lambda_factor=1.0e-4, temp_parameter=1))'\n", "t_out = timeit.repeat(stmt=t_stmt, setup='from __main__ import gdi_1, run_gradient_descent_iteration, X_trn, y_trn, theta, p_60', repeat=7, number=3)\n", "print([tm / 3 for tm in t_out])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# eliminate loops as much as possible\n", "def compute_probabilities_2(X, theta, temp_parameter):\n", " \"\"\"\n", " Computes, for each datapoint X[i], the probability that X[i] is labeled as j\n", " for j = 0, 1, ..., k-1\n", "\n", " Args:\n", " X - (n, d) NumPy array (n datapoints each with d features)\n", " theta - (k, d) NumPy array, where row j represents the parameters of our model for label j\n", " temp_parameter - the temperature parameter of softmax function (scalar)\n", " Returns:\n", " H - (k, n) NumPy array, where each entry H[j][i] is the probability that X[i] is labeled as j\n", " \"\"\"\n", " bases = np.full((theta.shape[0], X.shape[0]), fill_value=np.e)\n", " base_dot = np.matmul(theta, X.T) / temp_parameter\n", " base_dot -= np.amax(base_dot, axis=0)\n", " probs = np.power(bases, base_dot)\n", " div_v = np.sum(np.power(bases, base_dot), axis=0)\n", " probs /= div_v\n", " return probs" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "prob_rf_1 = []\n", "t_stmt = f'prob_rf_1.append(compute_probabilities_2(X_trn, theta, temp_parameter=1))'\n", "t_out_prf_1 = timeit.repeat(stmt=t_stmt, setup='from __main__ import prob_rf_1, compute_probabilities_2, X_trn, theta', repeat=7, number=3)\n", "p_60_rf = prob_rf_1[0]\n", "print([tm / 3 for tm in t_out_prf_1])\n", "print(p_60_rf)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# once again, modify to accept probabilities, to limit timing to this functions actual code\n", "# with the exception of the call to make_onehot()\n", "def compute_cost_function_2(X, Y, theta, probs, lambda_factor, temp_parameter):\n", " \"\"\"\n", " Computes the total cost over every datapoint.\n", "\n", " Args:\n", " X - (n, d) NumPy array (n datapoints each with d features)\n", " Y - (n, ) NumPy array containing the labels (a number from 0-9) for each\n", " data point\n", " theta - (k, d) NumPy array, where row j represents the parameters of our\n", " model for label j\n", " lambda_factor - the regularization constant (scalar)\n", " temp_parameter - the temperature parameter of softmax function (scalar)\n", "\n", " Returns\n", " c - the cost value (scalar)\n", " \"\"\"\n", " n_smp = X.shape[0]\n", " n_cat = theta.shape[0]\n", " probs = compute_probabilities(X, theta, temp_parameter)\n", " l_sum = 0\n", " r_sum = (lambda_factor / 2) * np.sum(theta*theta)\n", " # y_ohe = sparse.coo_matrix((np.ones(n_smp), (Y, range(n_smp))), shape=(n_cat,n_smp)).toarray()\n", " y_ohe = make_onehot(Y, n_smp, n_cat)\n", " valid_probs = np.choose(Y, np.log(probs[np.arange(theta.shape[0])], y_ohe))\n", " l_sum = - np.sum(valid_probs) / X.shape[0]\n", " return l_sum + r_sum" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "prob_cf_1 = []\n", "t_stmt = f'prob_cf_1.append(compute_cost_function_2(X_trn, y_trn, theta, p_60, lambda_factor=1.0e-4, temp_parameter=1))'\n", "t_out_crf_1 = timeit.repeat(stmt=t_stmt, setup='from __main__ import prob_cf_1, compute_cost_function_2, X_trn, y_trn, theta, p_60', repeat=7, number=3)\n", "c_60_rf = prob_cf_1[0]\n", "print([tm / 3 for tm in t_out_crf_1])\n", "print(c_60_rf)" ] } ], "metadata": { "interpreter": { "hash": "a27d3f2bf68df5402465348834a2195030d3fc5bfc8e594e2a17c8c7e2447c85" }, "kernelspec": { "display_name": "Python 3.9.2 64-bit ('ds-3.9': conda)", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.2" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }