{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import demjson\n",
    "import math\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy\n",
    "import json\n",
    "import random\n",
    "import scipy\n",
    "import sklearn\n",
    "import string\n",
    "import tensorflow as tf\n",
    "from collections import defaultdict # Dictionaries that take a default for missing entries\n",
    "from sklearn import linear_model"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Data is available at http://cseweb.ucsd.edu/~jmcauley/pml/data/. Download and save to your own directory"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataDir = \"/home/jmcauley/pml_data/\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Sigmoid function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def sigmoid(x):\n",
    "    return 1.0 / (1.0 + math.exp(-x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEZCAYAAAB4hzlwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xl8VOX59/HPFUgQEXCjqICggCzWh1oUl1qMdQMsIlURQVRcqq1IBUGsgASFVqviUqoVfiqPbRGX4IKK4tKgiGAQBOEXCIKAiSCCkUBYAsn1/JFhniEmECCZM8v3/XrNK+ecuWfmO5DMNfd9n8XcHREREYCUoAOIiEjsUFEQEZEwFQUREQlTURARkTAVBRERCVNREBGRMBUFiStm1sfM3om11zWz/5rZDXu5/zkz+8HM5tRMwkpf920z6xfN15T4ZjpOQWKNmZ0DPAicDOwCcoA73P3zQIPthZn9F/iXuz9bwX3nAJOBk9x9ew1mGAW0dPdra+o1JPHVDjqASCQzqw9MA24BXgbSgF8DO4LMdZBaAKtqsiCIVBcNH0msOQlwd3/Jy+xw9/fdfTGAmV1nZh/vbmxmF5nZUjMrMLN/mFnW7mGcUNtZZjYudP9XZnZWaPsaM1tnZtdGPFcDM3vezNab2ddmNjzivvKve6GZ5YSe9++AVfRmQlkmAmeZWaGZjSr/XKF2pWZ2Ymj5OTMbb2Zvhh7zqZmdENH2ZDObYWYbzWytmd1tZhcD9wBXmdlmM1sQahse1rIyI8xsVei9TzKzBqH7mocyXGtmq0P/Bvcc2H+hxDMVBYk1uUBJ6AOri5kdXkEbBzCzoynrTQwDjgKWAWeVa9sJ+AI4EngBmAKcBrQE+gHjzezQUNvxQH3KvtmnA9eaWf9KXjeTsg/ho4EVwK8qejOh4aRbgU/dvYG7j458rvLPHeEqYBRweOj5x4Ze+zDgPeBt4FigFfCBu78L/AV40d3ru/upFcTpD1wLnAucGHqv48u1+RXQGrgAuNfM2lT0viRxqShITHH3zcA5QCkwAVhvZq+bWaMKmncFFrv76+5e6u5PAN+Va/O1uz/vZZNnLwJNgdHuvtPd3wOKgVZmlkLZB/Hd7r7V3VcDj1BWOCp73VfdvcTdHwPWHeRbL9/TeNXdP3f3UuA/wC9C27sDa939MXcvdvcid8+u4mv0Aca5+2p33wr8Gegdeu9QVpgyQs+7CFgIdDiodyVxR0VBYo67L3P3G9z9eODnwHHAYxU0PQ74pty2vHLrkUViW+j5N5Tbdhhl3/hrA2si7lsNNKni65ZfP1iRRWYrZRmhrKitOMDnPI6y97Tbasrec+OIbZH/XpGvK0lCRUFimrvnApMoKw7lrQWaldvW9ABfagOwE2gesa05kF/J6x5fblv5HHtTBOwessLMjtmPx35D2dBXRfa1K+G3/PT97eSnvStJYioKElPMrI2ZDTazJqH1ZsDVwKcVNH8L+LmZXWpmtcxsAHt+663wJSraGBqmeQkYa2aHmVlzYBDwr0pet72ZXRZ63T9V4XUjLQRONrP/Y2Z1KJs7qOq+4W8Cx5jZQDNLC2XtFLrvO6CFmVX4HimbUxlkZi1CcxNjgSmh9w6V/NtIclFRkFizGTgDmGtmm4HZwCJgSPmG7r4RuBJ4iLJv+m2Beex999W9TfAOpGzIZCXwEfBvd39uL6/7YOh1WwKfVOG97X78cuA+4APKJtY/3vsj9njsFuBC4FLKhphyKZsUh7JJdwM2mtm83Q+JePizlBW5jygbgtpK2XumgrYVrUsS0MFrkjBC35DzgD7uPjPoPCLxSD0FiWuh4xQahoZhdh9XENVTSYgkEhUFiXdnUTYUsh64BOjh7vF89LNIoDR8JCIiYeopiIhIWFyfEM/M1M0RETkA7l7hLshxXRQA4nn4KyMjg4yMjKBjSA1JT08nKysr6BhSA+L9b7fyQ1k0fCQiIhFUFEREJExFIUDp6elBR5Aa1KJFi6AjSA1J5L9dFYUAJfIvlqgoJLJE/ttVURARkTAVBRERCVNREBGRMBUFEREJU1EQEZEwFQUREQlTURARkTAVBRERCQukKJjZM2b2nZkt2kubJ8xsuZl9YWa/iGY+EZFkFVRP4Tng4sruNLOuQEt3bw3cAvwzWsFERJJZIEXB3WcBBXtp0gN4PtR2LtDQzBpHI5uISDKL1espNAG+iVjPD237Lpg4IiKxyd0pKSmhpKSE0tLSPX5WtK20tHSvzxerRaHKIi90kZ6entAnqhKR4Lk7W7ZsobCwkMLCQoqKiti6dSvbtm37yc+KthUXF1NcXMzOnTvZuXPnHsvl1ytbjvyQB0hJSaFWrVrUqlUrvBy5bdeuXezcuRMzIyVl7wNEsVoU8oFmEetNQ9t+Ip6vfiQiwSkpKWHDhg18//33fP/99+Hl3T83btxIYWEhmzZtYtOmTeHlzZs3U6dOHRo2bEiDBg2oV68ehx56KHXr1q3055FHHkmTJk2oW7cuaWlppKWlkZqaSmpq6n4vp6am7lEA9vUhX5G9XXktyKJgoVtF3gBuA140szOBH91dQ0ciUiXuzvr161m+fDlr1qwhPz+fvLw88vLywsvr16/n8MMPp1GjRhx99NE0atQovNy6dWvOOOMMGjZsGL41aNAg/LN27Vj9Pn3wAnlnZjYZSAeOMrM1wCggDXB3n+Dub5tZNzP7CigC+geRU0RiW1FREUuWLGHZsmUsX76c5cuXk5uby1dffUXt2rVp3bo1LVq0oGnTprRo0YJzzjmHpk2b0qRJE4499lhSU1ODfgsxJ5Ci4O59qtBmQDSyiEh82LhxI3PnzmX+/PksXLiQhQsXkpeXR9u2bWnbti2tW7fmkksu4Y477qB169YceeSRQUeOS4nbBxKRuOXuLF26lI8++ohPP/2UTz/9lHXr1nH66afTsWNHevbsSUZGBm3atEnooZwg6F9TRGLC2rVref/998O31NRUzjvvPM4++2zuvPNO2rdvT61atYKOmfBUFEQkMLm5uWRmZjJ16lRWrFjBeeedx4UXXsjIkSNp2bLlXveSkZqhoiAiUbVixQqef/55MjMzKSgooGfPnjz44IN07txZQ0ExQP8DIlLjtm7dSmZmJs8++yxLliyhb9++TJw4kTPOOOOA9rOXmqOiICI15quvvuLRRx/lhRde4Mwzz2TAgAF0796dtLS0oKNJJVQURKTazZ07l4ceeoiZM2dy6623smjRIpo2bRp0LKkCFQURqTbvvfceY8aMYfXq1QwePJhJkyZx2GGHBR1L9oOKgogctAULFnDXXXexZs0aRo0aRa9evTRpHKc0wyMiB2zVqlVcc801dOvWjd/97ncsXryYPn36qCDEMRUFEdlvO3bsYOTIkXTs2JFWrVqRm5vLH/7wB51LKAGonIvIfvnkk0+46aabaNOmDYsWLaJJkyZBR5JqpKIgIlWyefNm7rnnHjIzM3niiSe4/PLLdcRxAtLwkYjs08cff8zPf/5ztmzZwuLFi7niiitUEBKUegoiUqnS0lIeeughHn30UZ555hkuueSSoCNJDVNREJEKbdy4keuuu44ffviB7OxsmjVrtu8HSdzT8JGI/MScOXP45S9/Sdu2bZk5c6YKQhJRT0FE9vD8888zZMgQJk6cSI8ePYKOI1GmoiAiQNn8wciRI5kyZQpZWVm0b98+6EgSABUFEaG4uJgbbriBlStXMmfOHBo1ahR0JAmIioJIkisqKuKKK64gNTWVDz74gLp16wYdSQKkiWaRJLZp0yYuuugijjnmGKZOnaqCICoKIsmqoKCAiy66iFNPPZVnn31WJ7ETQEVBJCkVFBRwwQUXcPbZZ/P3v/9dRydLmIqCSJLZvHkz3bp1o3PnzowbN04FQfagoiCSRLZt20aPHj045ZRTVBCkQioKIkli165d9O7dm2OOOYannnpKBUEqpJklkSTg7gwYMIBt27bx8ssvU6tWraAjSYwKpKdgZl3MbKmZ5ZrZsArub2ZmH5rZfDP7wsy6BpFTJFH85S9/Ye7cuWRmZpKWlhZ0HIlhUe8pmFkKMB44H/gWyDaz1919aUSzEcCL7v60mbUD3gZOiHZWkUTw4osvMmHCBObMmUP9+vWDjiMxLoieQidgubuvdvedwBSg/Fm3SoEGoeXDgfwo5hNJGJ999hkDBgzgjTfe4Nhjjw06jsSBIOYUmgDfRKznUVYoIo0GZpjZQOBQ4IIoZRNJGPn5+fTs2ZOJEyfSoUOHoONInIjVieargefc/VEzOxP4N3ByRQ0zMjLCy+np6aSnp0cjn0hM27lzJ7169eIPf/gDl112WdBxJGBZWVlkZWVVqa25e82mKf+CZR/yGe7eJbR+N+Du/mBEm8XAxe6eH1pfAZzh7hvKPZdHO79IVWVkZOzxpSWa7rzzTpYuXcq0adNISdGe57InM8PdK9wnOYieQjbQysyaA2uB3pT1DCKtpmzI6P+GJprrlC8IIlKxqVOnkpmZyeeff66CIPst6kXB3UvMbAAwg7KJ7mfcPcfMRgPZ7v4mMASYaGaDKJt0vi7aOUXi0VdffcUtt9zCW2+9xVFHHRV0HIlDgcwpuPs7QJty20ZFLOcA50Q7l0g827ZtG1deeSWjRo2iU6fy+26IVI36liIJYuDAgbRp04bbbrst6CgSx2J17yMR2Q+vvPIKWVlZzJ8/X+c0koOioiAS59atW8eAAQN47bXXdMSyHDQNH4nEMXfn5ptv5qabbuLMM88MOo4kAPUUROLYc889R15eHpmZmUFHkQShoiASp1atWsWwYcP48MMPdeZTqTYaPhKJQ6WlpVx//fXcddddnHLKKUHHkQSioiAShx5//HFKSkoYPHhw0FEkwWj4SCTOrFy5krFjxzJ37lxdQU2qnXoKInHE3fnjH//IXXfdRcuWLYOOIwlIRUEkjrz44ousXbuWQYMGBR1FEpSGj0TiREFBAYMHD2bq1KmkpqYGHUcSlHoKInHi7rvvpmfPnjpITWqUegoiceCTTz7hrbfeYsmSJUFHkQSnnoJIjCsuLuaWW27hscceo2HDhkHHkQSnoiAS4x555BFatGjB5ZdfHnQUSQIaPhKJYXl5eTzyyCNkZ2frlNgSFeopiMSwu+++m1tvvZUTTjgh6CiSJNRTEIlRs2fPZubMmTz99NNBR5Ekop6CSAwqLS1l4MCBPPDAA9SrVy/oOJJEVBREYtCkSZOoU6cOffr0CTqKJBkNH4nEmE2bNjF8+HCmTZumyWWJOvUURGLMmDFj6NatG6eddlrQUSQJqacgEkNyc3OZNGkSixcvDjqKJCn1FERiyLBhwxg6dCiNGzcOOookKfUURGLErFmzmD9/Pi+88ELQUSSJqacgEgPcnaFDhzJ27FgOOeSQoONIElNREIkBU6dOZfv27doFVQIXSFEwsy5mttTMcs1sWCVtepnZEjP70sz+He2MItFSXFzM3XffzUMPPURKir6nSbCiPqdgZinAeOB84Fsg28xed/elEW1aAcOAs9y90MyOjnZOkWiZMGECLVu25IILLgg6ikggE82dgOXuvhrAzKYAPYClEW1uBv7h7oUA7r4h6ilFoqCwsJAxY8bw7rvvBh1FBAhm+KgJ8E3Eel5oW6STgDZmNsvMZpvZxVFLJxJFf/vb3+jSpQsdOnQIOooIELu7pNYGWgGdgeOBj8zs57t7DiKJ4Ntvv+Wpp57iiy++CDqKSFgQRSGfsg/63ZqGtkXKA+a4eymwysxygdbA5+WfLCMjI7ycnp5Oenp6NccVqRljxozhhhtuoFmzZkFHkQSXlZVFVlZWldqau9dsmvIvaFYLWEbZRPNa4DPganfPiWhzcWjb9aFJ5s+BX7h7Qbnn8mjnF6mqjIyMPb60RFq5ciWnn346y5Yt4+ijtR+FRJeZ4e4Vnm0x6nMK7l4CDABmAEuAKe6eY2ajzey3oTbvAhvNbAnwATCkfEEQiWejR49mwIABKggScwKZU3D3d4A25baNKrd+J3BnNHOJRENOTg7Tp09n+fLlQUcR+QkdKSMSZffeey933nknDRs2DDqKyE/E6t5HIglpwYIFfPLJJ0yaNCnoKCIVUk9BJIpGjBjBn//8Z113WWKWegoiUTJ79mwWL17M1KlTg44iUin1FESiwN0ZPnw49957L3Xq1Ak6jkilVBREouCDDz4gPz+f6667LugoInuloiBSw3b3Eu677z5q19aIrcS2Kv2Gmllt4ErgrNCmekAJsBVYBEx29+01klAkzk2bNo3t27fTq1evoKOI7NM+i4KZnQ78GnjP3X9y8Vgzawn83swWuvvMGsgoErdKS0sZOXIk999/vy6gI3GhKj2F7e4+rrI73X0F8ISZnWhmae5eXH3xROLbSy+9xCGHHEL37t2DjiJSJfssCu7+5e5lMzsBWFvRUJG7r6zmbCJxrbS0lFGjRvGPf/wDswrPPSYSc/a3PzsEOBPAzH5tZudUfySRxLBw4UKOO+44zj///KCjiFTZ/u4K8RnQwsxOcPePzeyymgglEu927NhBVlYW06dPVy9B4sr+9hSaAcXAYDP7EDit+iOJxL8JEybws5/9jLPPPjvoKCL7Zb8usmNmfYBX3L3YzI4CfufuE2ss3b7z+KhRo36yvbIrsFV29SG1V/vqbF9UVESrVq047bTT6NixY+B51F7ty7ff20V29rco1AI6uPv80K6qXdz9/io/QTXTldckFj344IPMmzePk08+udIrr4kE6YCvvGZmdUI9AqDsqmnuPj+0nB1ZEMxMF5qVpLdp0yYeeeQR7rvvvqCjiByQvRYFd98BnGVmV5tZ3YramNnhZvZ7oHlNBBSJJ+PGjaNr1660a9cu6CgiB6Qqxym8aWbHAIPM7GfAIaHH7T7NRR7wP+6+qUaTisS4DRs2MH78eLKzs4OOInLAqrRLqruvM7MN7v6Xmg4kEq8efPBBevXqxYknnhh0FJEDtj/HKXQ3szVAGvC5u+fXUCaRuJOfn8+zzz7Ll19+ue/GIjFsf45TaAb8DEgF/mhmfzEdlSMCwJgxY7jxxhs57rjjgo4iclD2p6ewjLJTZO8CMs3sCMpOp/1SjSQTiRMrVqzg5ZdfZtmyZUFHETlo+9NTGAn808zODRWEekCtmoklEj8yMjIYOHAgRx111L4bi8S4KvcU3D3XzAYAfYHLga+AJ2sqmEg8WLx4MTNmzODJJ/WnIIlhv06IFzpl9jM1lEUk7owcOZJhw4ZRv379oKOIVAtdMFbkAH322WfMmzePyZMnBx1FpNro+oAiB2j48OGMHDmSunUrPNhfJC4FUhTMrIuZLTWzXDMbtpd2l5tZqZn9Mpr5RPblww8/5Ouvv6Z///5BRxGpVlEvCmaWAowHLgZOBq42s7YVtDsMGAjMiW5Ckb1zd4YPH87o0aNJTU0NOo5ItQqip9AJWO7uq919JzAF6FFBu/uBB4Ad0Qwnsi9vvvkmW7ZsoXfv3kFHEal2QRSFJsA3Eet5oW1hZnYq0NTdp0czmMi+lJaWMnz4cMaMGUOtWjpMRxJPzO19FDp1xjjgusjNAcUR2cOUKVOoW7cul156adBRRGpEEEUhHzg+Yr1paNtu9Smba8gKFYhjgNfN7NLdF/iJFHllq8ouUydSHXbs2MHw4cOZNGkSOu2XxJPKLtVZkf26HGd1CF3ScxlwPrAW+Ay42t1zKmn/X2Cwuy+o4D5djlOiZty4cWRlZfHGG29UqX1GRoYuxykxaW+X44x6T8HdS0Kny5hB2ZzGM+6eY2ajgWx3f7P8Q9DwkQSsoKCABx54oMrftkTiVSBzCu7+DtCm3LZRlbT9TVRCiezFX//6Vy677DLat28fdBSRGhVzE80isWb16tU888wzuoCOJAWd5kJkH0aMGMFtt92mC+hIUlBPQWQvFixYwPvvv09ubm7QUUSiQj0FkUq4O0OHDuXee+/VqbElaagoiFTi7bffJi8vj5tuuinoKCJRo+EjkQoUFxczaNAgHn/8cZ30TpKKegoiFXjiiSc46aST6Nq1a9BRRKJKPQWRctatW8cDDzzA7Nmzg44iEnXqKYiUc88999C/f39OOumkoKOIRJ16CiIRsrOzmT59OkuXLg06ikgg1FMQCXF3/vSnPzF27FgaNmwYdByRQKgoiIRMnjyZ4uJirr/++qCjiARGw0ciQGFhIcOGDePFF18kJUXflSR56bdfBBg+fDhdunThV7/6VdBRRAKlnoIkvc8++4yXX36Z//3f/w06ikjg1FOQpLZr1y5+//vf8/DDD3PkkUcGHUckcCoKktQef/xxjj76aPr27Rt0FJGYoOEjSVqrVq3ir3/9K3PmzMFMV3wVAfUUJEm5O7fddhuDBg2iVatWQccRiRnqKUhSyszMZNWqVbz66qtBRxGJKSoKknS+//57br/9dl555RXS0tKCjiMSUzR8JEnF3bnlllvo16+fjkkQqYB6CpJU/v3vf7N8+XImT54cdBSRmKSiIEnjm2++YfDgwcyYMYNDDjkk6DgiMUnDR5IUSktL6d+/P3fccQennnpq0HFEYpaKgiSFJ598ki1btjBs2LCgo4jENA0fScJbtmwZGRkZzJ49m9q19SsvsjfqKUhC27ZtG7169eL+++/X5TVFqkBFQRLa7bffTvv27bn11luDjiISFwIpCmbWxcyWmlmumf1kkNfMBpnZEjP7wszeM7NmQeSU+DZp0iRmzZrFhAkTdG4jkSqKelEwsxRgPHAxcDJwtZm1LddsPtDR3X8BZAIPRTelxLsvv/ySoUOHkpmZSf369YOOIxI3gugpdAKWu/tqd98JTAF6RDZw95nuvj20OgdoEuWMEscKCwu54oorGDduHCeffHLQcUTiShBFoQnwTcR6Hnv/0L8RmF6jiSRhuDs33XQT6enp9OvXL+g4InEnpvfPM7NrgI7AuZW1ycjICC+np6eTnp5e47kkdj388MOsWLGCTz75JOgoIjEjKyuLrKysKrUNoijkA8dHrDcNbduDmV0A/BnoHBpmqlBkUZDkNm3aNB577DHmzJmj01iIRCj/hXn06NGVtg1i+CgbaGVmzc0sDegNvBHZwMxOBf4JXOruGwPIKHFm0aJF3Hjjjbz66qs0a6ad1UQOVNSLgruXAAOAGcASYIq755jZaDP7bajZ34B6wMtmtsDMXot2TokfeXl5/Pa3v+WJJ56gU6dOQccRiWuBzCm4+ztAm3LbRkUsXxj1UBKXfvzxR7p27crtt99O7969g44jEvd0RLPEre3bt9OzZ09+85vfMGTIkKDjiCQEFQWJSzt37uTKK6+kcePGjBs3Tkcsi1QTFQWJOyUlJVxzzTWYGf/617+oVatW0JFEEkZMH6cgUt6uXbvo378/P/zwA9OmTSM1NTXoSCIJRUVB4sauXbvo168fGzdu5PXXX9exCCI1QEVB4sKOHTvo27cvRUVFvPHGGyoIIjVEcwoS8zZv3swll1yCu/Paa6+pIIjUIBUFiWnfffcd5513Hq1ateKll16iTp06QUcSSWgqChKzFi9ezBlnnEH37t156qmntJeRSBRoTkFi0ttvv83111/PY489Rp8+fYKOI5I0VBQkppSWljJ27Fj++c9/8tprr3H22WcHHUkkqagoSMzYsGED/fv3p6CggHnz5nHssccGHUkk6WhOQWLC9OnT6dChA23btuXDDz9UQRAJiHoKEqitW7cydOhQ3nzzTf7zn//oynkiAVNPQQKTnZ3NqaeeyqZNm1i4cKEKgkgMUE9Bom7jxo2MGDGCV199lccff5yrrroq6EgiEqKegkRNSUkJTz/9NO3bt6d27drk5OSoIIjEGPUUJCo+/vhj7rjjDg499FBmzJhBhw4dgo4kIhVQUZAaNWvWLDIyMlixYgX3338/ffv21QVxRGKYho+kRsyaNYsLLriAfv36cfXVV5Obmxu+MI6IxC71FKTaFBcXk5mZyfjx4/n2228ZMWIE1157rS6EIxJHVBTkoOXn5/P0008zceJE2rdvz5AhQ+jevTu1a+vXSyTe6K9WDsjmzZt57bXXmDx5MnPmzKFv37588MEHtG/fPuhoInIQVBSkyoqKinjvvfd44YUXeOeddzj33HO57rrreOWVV6hXr17Q8USkGqgoyF6tXLmSt956i7feeovZs2dz+umnc9VVV/Hkk09y1FFHBR1PRKqZioLsYfXq1Xz00Ud89NFHzJw5k8LCQrp168bNN9/MSy+9RIMGDYKOKCI1SEUhiRUWFjJ//nzmzZvHvHnz+PTTT9mxYwedO3emc+fODBgwgFNOOYWUFO25LJIsVBSSQFFREUuXLmXp0qXk5OSQk5PD4sWLyc/Pp0OHDnTs2JFu3boxevRoTjrpJB1LIJLEAikKZtYFeIyyg+eecfcHy92fBjwPdAQ2AFe5+5qoB40TpaWlrF27ljVr1rB69erwzxUrVpCTk8P69etp3bo17dq1o127dvTq1YuMjAzatWun3UZFZA9R/0QwsxRgPHA+8C2QbWavu/vSiGY3Aj+4e2szuwr4G9A72lmDtGvXLjZt2kRBQQHff/8933333U9u69evJy8vj/z8fI444giaN2/O8ccfT/PmzWnTpg1du3alXbt2tGjRQhe9F5EqCeJrYidgubuvBjCzKUAPILIo9ABGhZZfoayIxCR3Z9euXRQXF7N9+3a2bt3K1q1bKSoq2uNnRctbtmzhxx9/pKCggB9//DF8KygoYNu2bTRs2JCGDRvSqFEjGjduHL61bduWzp0707hxY5o0aUKzZs045JBDgv6nEJEEEERRaAJ8E7GeR1mhqLCNu5eY2Y9mdqS7/1D+ycaMGUNJSUmVb6Wlpftss2vXLnbs2EFxcXGFt/L31a5dm7S0NNLS0qhXrx716tXj0EMPrfTn7uVGjRpxxBFHcPjhh4d/7r4ddthhmuAVkaiLlwHlSmc+3333XVJSUjAzWrZsSatWrahVq9ZebykpKXu9PzU1NfwhX6dOnfBy+VudOnVITU3Vh7eIxLSsrCyysrKq1DaIopAPHB+x3jS0LVIe0Az41sxqAQ0q6iVA2Xn6RUSkcunp6Xtc7nb06NGVtg3iK2420MrMmof2MurQL683AAADIklEQVQNvFGuzTTgutDylcCHUcwnIpK0ot5TCM0RDABm8P93Sc0xs9FAtru/CTwD/MvMlgMbSbI9j0REghLInIK7vwO0KbdtVMTyDqBXtHOJiCQ7zZCKiEiYioKIiISpKIiISJiKgoiIhKkoiIhImIqCiIiEqSiIiEiYioKIiISpKIiISJiKgoiIhKkoBKiqp7KV+LRq1aqgI0gNSeS/XRWFACXyL5aoKCSyRP7bVVEQEZEwFQUREQkzdw86wwEzs/gNLyISIHev8DLHcV0URESkemn4SEREwlQUREQkTEUhYGY2yszyzGx+6NYl6ExycMysi5ktNbNcMxsWdB6pXma2yswWmtkCM/ss6DzVTXMKATOzUcBmdx8XdBY5eGaWAuQC5wPfAtlAb3dfGmgwqTZmthLo6O4FQWepCeopxIYK9wKQuNQJWO7uq919JzAF6BFwJqleRgJ/dibsG4szt5nZF2b2P2bWMOgwclCaAN9ErOeFtknicOBdM8s2s5uDDlPdagcdIBmY2XtA48hNlP1iDQeeBO5zdzezMcA44MbopxSRKvqVu681s0bAe2aW4+6zgg5VXVQUosDdL6xi04nAtJrMIjUuHzg+Yr1paJskCHdfG/r5vZm9StmQYcIUBQ0fBczMjolY/R2wOKgsUi2ygVZm1tzM0oDewBsBZ5JqYmaHmtlhoeV6wEUk2N+segrB+5uZ/QIoBVYBtwQbRw6Gu5eY2QBgBmVfup5x95yAY0n1aQy8GjrFTm3gP+4+I+BM1Uq7pIqISJiGj0REJExFQUREwlQUREQkTEVBRETCVBRERCRMRUFERMJUFEREJExFQUREwlQUREQkTKe5EKlGZlYLuAo4kbJTaHcCHnb3rwMNJlJF6imIVK8OwCvASspOkf4ysDbQRCL7QUVBpBq5+3x3LwbOAma6e5a7bw86l0hVqSiIVCMzO93MjgJOdvevzezXQWcS2R+aUxCpXl2AdcBsM7sM2BBwHpH9olNni4hImIaPREQkTEVBRETCVBRERCRMRUFERMJUFEREJExFQUREwlQUREQkTEVBRETC/h9Oo1xIaVUZawAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7f8f9c5dddd8>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "X = numpy.arange(-7,7.1,0.1)\n",
    "Y = [sigmoid(x) for x in X]\n",
    "plt.plot(X, Y, color='black')\n",
    "plt.plot([0,0],[-2,2], color = 'black', linewidth=0.5)\n",
    "plt.plot([-7,7],[0.5,0.5], color = 'k', linewidth=0.5, linestyle='--')\n",
    "plt.xlim(-7, 7)\n",
    "plt.ylim(-0.1, 1.1)\n",
    "plt.xticks([-5,0,5])\n",
    "plt.xlabel(\"$x$\")\n",
    "plt.ylabel(r\"$\\sigma(x)$\")\n",
    "plt.title(\"Sigmoid function\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Implementing a simple classifier"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Read a small dataset of beer reviews"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "path = dataDir + \"beer_50000.json\"\n",
    "f = open(path)\n",
    "\n",
    "data = []\n",
    "\n",
    "for l in f:\n",
    "    if 'user/gender' in l: # Discard users who didn't specify gender\n",
    "        d = eval(l) # demjson is more secure but much slower!\n",
    "        data.append(d)\n",
    "    \n",
    "f.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'beer/ABV': 7.7,\n",
       " 'beer/beerId': '64883',\n",
       " 'beer/brewerId': '1075',\n",
       " 'beer/name': 'Cauldron DIPA',\n",
       " 'beer/style': 'American Double / Imperial IPA',\n",
       " 'review/appearance': 4.0,\n",
       " 'review/aroma': 4.5,\n",
       " 'review/overall': 4.0,\n",
       " 'review/palate': 4.0,\n",
       " 'review/taste': 4.5,\n",
       " 'review/text': \"According to the website, the style for the Caldera Cauldron changes every year. The current release is a DIPA, which frankly is the only cauldron I'm familiar with (it was an IPA/DIPA the last time I ordered a cauldron at the horsebrass several years back). In any event... at the Horse Brass yesterday.\\t\\tThe beer pours an orange copper color with good head retention and lacing. The nose is all hoppy IPA goodness, showcasing a huge aroma of dry citrus, pine and sandlewood. The flavor profile replicates the nose pretty closely in this West Coast all the way DIPA. This DIPA is not for the faint of heart and is a bit much even for a hophead like myslf. The finish is quite dry and hoppy, and there's barely enough sweet malt to balance and hold up the avalanche of hoppy bitterness in this beer. Mouthfeel is actually fairly light, with a long, persistentely bitter finish. Drinkability is good, with the alcohol barely noticeable in this well crafted beer. Still, this beer is so hugely hoppy/bitter, it's really hard for me to imagine ordering more than a single glass. Regardless, this is a very impressive beer from the folks at Caldera.\",\n",
       " 'review/timeStruct': {'hour': 18,\n",
       "  'isdst': 0,\n",
       "  'mday': 30,\n",
       "  'min': 53,\n",
       "  'mon': 12,\n",
       "  'sec': 26,\n",
       "  'wday': 3,\n",
       "  'yday': 364,\n",
       "  'year': 2010},\n",
       " 'review/timeUnix': 1293735206,\n",
       " 'user/ageInSeconds': 3581417047,\n",
       " 'user/birthdayRaw': 'Jun 16, 1901',\n",
       " 'user/birthdayUnix': -2163081600,\n",
       " 'user/gender': 'Male',\n",
       " 'user/profileName': 'johnmichaelsen'}"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Predict the user's gender from the length of their review"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = [[1, len(d['review/text'])] for d in data]\n",
    "y = [d['user/gender'] == 'Female' for d in data]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Fit the model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
       "                   intercept_scaling=1, l1_ratio=None, max_iter=100,\n",
       "                   multi_class='auto', n_jobs=None, penalty='l2',\n",
       "                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,\n",
       "                   warm_start=False)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mod = sklearn.linear_model.LogisticRegression()\n",
    "mod.fit(X,y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Calculate the accuracy of the model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9849041807577317"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "predictions = mod.predict(X) # Binary vector of predictions\n",
    "correct = predictions == y # Binary vector indicating which predictions were correct\n",
    "sum(correct) / len(correct)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Accuracy seems surprisingly high! Check against the number of positive labels..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9849041807577317"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "1 - (sum(y) / len(y))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Accuracy is identical to the proportion of \"males\" in the data. Confirm that the model is never predicting positive"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(predictions)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Implementing a balanced classifier"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Use the class_weight='balanced' option to implement the balanced classifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "mod = sklearn.linear_model.LogisticRegression(class_weight='balanced')\n",
    "mod.fit(X,y)\n",
    "predictions = mod.predict(X)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Simple classification diagnostics"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Accuracy"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Compute the accuracy of the balanced model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.4225849139832378"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "correct = predictions == y\n",
    "sum(correct) / len(correct)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### True positives, False positives (etc.), and balanced error rate (BER)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "TP = sum([(p and l) for (p,l) in zip(predictions, y)])\n",
    "FP = sum([(p and not l) for (p,l) in zip(predictions, y)])\n",
    "TN = sum([(not p and not l) for (p,l) in zip(predictions, y)])\n",
    "FN = sum([(not p and l) for (p,l) in zip(predictions, y)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "TP = 199\n",
      "FP = 11672\n",
      "TN = 8423\n",
      "FN = 109\n"
     ]
    }
   ],
   "source": [
    "print(\"TP = \" + str(TP))\n",
    "print(\"FP = \" + str(FP))\n",
    "print(\"TN = \" + str(TN))\n",
    "print(\"FN = \" + str(FN))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Can rewrite the accuracy in terms of these metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.4225849139832378"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(TP + TN) / (TP + FP + TN + FN)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### True positive and true negative rates"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "TPR = TP / (TP + FN)\n",
    "TNR = TN / (TN + FP)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.6461038961038961, 0.4191589947748196)"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "TPR,TNR"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Balanced error rate (BER)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.4673685545606422"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "BER = 1 - 1/2 * (TPR + TNR)\n",
    "BER"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Precision, recall, and F1 scores"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "precision = TP / (TP + FP)\n",
    "recall = TP / (TP + FN)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.0167635414034201, 0.6461038961038961)"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "precision, recall"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "F1 score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.032679201904918305"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "F1 = 2 * (precision*recall) / (precision + recall)\n",
    "F1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Significance testing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "path = dataDir + \"beer_500.json\"\n",
    "f = open(path)\n",
    "\n",
    "data = []\n",
    "\n",
    "for l in f:\n",
    "    d = eval(l)\n",
    "    data.append(d)\n",
    "    \n",
    "f.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Randomly sort the data (so that train and test are iid)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "random.seed(0)\n",
    "random.shuffle(data)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Predict overall rating from ABV"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "X1 = [[1] for d in data] # Model *without* the feature\n",
    "X2 = [[1, d['beer/ABV']] for d in data] # Model *with* the feature\n",
    "y = [d['review/overall'] for d in data]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Fit the two models (with and without the feature)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "model1 = sklearn.linear_model.LinearRegression(fit_intercept=False)\n",
    "model1.fit(X1[:250], y[:250]) # Train on first half\n",
    "residuals1 = model1.predict(X1[250:]) - y[250:] # Test on second half"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "model2 = sklearn.linear_model.LinearRegression(fit_intercept=False)\n",
    "model2.fit(X2[:250], y[:250])\n",
    "residuals2 = model2.predict(X2[250:]) - y[250:]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Residual sum of squares for both models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "rss1 = sum([r**2 for r in residuals1])\n",
    "rss2 = sum([r**2 for r in residuals2])\n",
    "k1,k2 = 1,2 # Number of parameters of each model\n",
    "n = len(residuals1) # Number of samples"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "F statistic (results may vary for different random splits)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.0"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "F = ((rss1 - rss2) / (k2 - k1)) / (rss2 / (n-k2))\n",
    "scipy.stats.f.cdf(F,k2-k1,n-k2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Regression in tensorflow"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Small dataset of fantasy reviews"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "path = dataDir + \"fantasy_100.json\"\n",
    "f = open(path)\n",
    "\n",
    "data = []\n",
    "\n",
    "for l in f:\n",
    "    d = json.loads(l)\n",
    "    data.append(d)\n",
    "    \n",
    "f.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Predict rating from review length"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "ratings = [d['rating'] for d in data]\n",
    "lengths = [len(d['review_text']) for d in data]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = numpy.matrix([[1,l] for l in lengths])\n",
    "y = numpy.matrix(ratings).T"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "First check the coefficients if we fit the model using sklearn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[3.98394783e+00, 1.19363599e-04]])"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = sklearn.linear_model.LinearRegression(fit_intercept=False)\n",
    "model.fit(X, y)\n",
    "theta = model.coef_\n",
    "theta"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Convert features and labels to tensorflow structures"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = tf.constant(X, dtype=tf.float32)\n",
    "y = tf.constant(y, dtype=tf.float32)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Build tensorflow regression class"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "class regressionModel(tf.keras.Model):\n",
    "    def __init__(self, M, lamb):\n",
    "        super(regressionModel, self).__init__()\n",
    "        # Initialize weights to zero\n",
    "        self.theta = tf.Variable(tf.constant([0.0]*M, shape=[M,1], dtype=tf.float32))\n",
    "        self.lamb = lamb\n",
    "\n",
    "    # Prediction (for a matrix of instances)\n",
    "    def predict(self, X):\n",
    "        return tf.matmul(X, self.theta)\n",
    "\n",
    "    # Mean Squared Error\n",
    "    def MSE(self, X, y):\n",
    "        return tf.reduce_mean((tf.matmul(X, self.theta) - y)**2)\n",
    "\n",
    "    # Regularizer\n",
    "    def reg(self):\n",
    "        return self.lamb * tf.reduce_sum(self.theta**2)\n",
    "    \n",
    "    # L1 regularizer\n",
    "    def reg1(self):\n",
    "        return self.lamb * tf.reduce_sum(tf.abs(self.theta))\n",
    "\n",
    "    # Loss\n",
    "    def call(self, X, y):\n",
    "        return self.MSE(X, y) + self.reg()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Initialize the model (lambda = 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "optimizer = tf.keras.optimizers.Adam(0.1)\n",
    "model = regressionModel(len(X[0]), 0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Train for 1000 iterations of gradient descent (could implement more careful stopping criteria)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "for iteration in range(1000):\n",
    "    with tf.GradientTape() as tape:\n",
    "        loss = model(X, y)\n",
    "    gradients = tape.gradient(loss, model.trainable_variables)\n",
    "    optimizer.apply_gradients(zip(gradients, model.trainable_variables))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Confirm that we get a similar result to what we got using sklearn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<tf.Variable 'Variable:0' shape=(2, 1) dtype=float32, numpy=\n",
       "array([[3.98340607e+00],\n",
       "       [1.19614124e-04]], dtype=float32)>"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.theta"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Make a few predictions using the model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<tf.Tensor: shape=(10, 1), dtype=float32, numpy=\n",
       "array([[4.232921 ],\n",
       "       [4.165339 ],\n",
       "       [4.1651   ],\n",
       "       [4.197635 ],\n",
       "       [4.194166 ],\n",
       "       [4.0396247],\n",
       "       [4.0818486],\n",
       "       [4.047041 ],\n",
       "       [4.0570884],\n",
       "       [4.0489545]], dtype=float32)>"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.predict(X)[:10]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Classification in Tensorflow"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Predict whether rating is above 4 from length"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = numpy.matrix([[1,l*0.0001] for l in lengths]) # Rescale the lengths for conditioning\n",
    "y_class = numpy.matrix([[r > 4 for r in ratings]]).T"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Convert to tensorflow structures"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = tf.constant(X, dtype=tf.float32)\n",
    "y_class = tf.constant(y_class, dtype=tf.float32)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Tensorflow classification class"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "class classificationModel(tf.keras.Model):\n",
    "    def __init__(self, M, lamb):\n",
    "        super(classificationModel, self).__init__()\n",
    "        self.theta = tf.Variable(tf.constant([0.0]*M, shape=[M,1], dtype=tf.float32))\n",
    "        self.lamb = lamb\n",
    "\n",
    "    # Probability (for a matrix of instances)\n",
    "    def predict(self, X):\n",
    "        return tf.math.sigmoid(tf.matmul(X, self.theta))\n",
    "\n",
    "    # Objective\n",
    "    def obj(self, X, y):\n",
    "        pred = self.predict(X)\n",
    "        pos = y*tf.math.log(pred)\n",
    "        neg = (1.0 - y)*tf.math.log(1.0 - pred)\n",
    "        return -tf.reduce_mean(pos + neg)\n",
    "    \n",
    "    # Same objective, using tensorflow short-hand\n",
    "    def obj_short(self, X, y):\n",
    "        pred = self.predict(X)\n",
    "        bce = tf.keras.losses.BinaryCrossentropy()\n",
    "        return tf.reduce_mean(bce(y, pred))\n",
    "\n",
    "    # Regularizer\n",
    "    def reg(self):\n",
    "        return self.lamb * tf.reduce_sum(self.theta**2)\n",
    "    \n",
    "    # Loss\n",
    "    def call(self, X, y):\n",
    "        return self.obj(X, y) + self.reg()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Initialize the model (lambda = 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "optimizer = tf.keras.optimizers.Adam(0.1)\n",
    "model = classificationModel(len(X[0]), 0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Run for 1000 iterations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "for iteration in range(1000):\n",
    "    with tf.GradientTape() as tape:\n",
    "        loss = model(X, y_class)\n",
    "    gradients = tape.gradient(loss, model.trainable_variables)\n",
    "    optimizer.apply_gradients(zip(gradients, model.trainable_variables))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<tf.Variable 'Variable:0' shape=(2, 1) dtype=float32, numpy=\n",
       "array([[-0.05846212],\n",
       "       [ 0.33439782]], dtype=float32)>"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.theta"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Model predictions (as probabilities via the sigmoid function)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<tf.Tensor: shape=(10, 1), dtype=float32, numpy=\n",
       "array([[0.5028233 ],\n",
       "       [0.49809995],\n",
       "       [0.49808323],\n",
       "       [0.50035715],\n",
       "       [0.5001147 ],\n",
       "       [0.4893153 ],\n",
       "       [0.49226528],\n",
       "       [0.48983335],\n",
       "       [0.49053538],\n",
       "       [0.48996705]], dtype=float32)>"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.predict(X)[:10]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Regularization pipeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "def parseData(fname):\n",
    "    for l in open(fname):\n",
    "        yield eval(l)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Just read the first 5000 reviews (deliberately making a model that will overfit if not carefully regularized)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = list(parseData(dataDir + \"beer_50000.json\"))[:5000]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Fit a simple bag-of-words model (see Chapter 8 for more details)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [],
   "source": [
    "wordCount = defaultdict(int)\n",
    "punctuation = set(string.punctuation)\n",
    "for d in data: # Strictly, should just use the *training* data to extract word counts\n",
    "    r = ''.join([c for c in d['review/text'].lower() if not c in punctuation])\n",
    "    for w in r.split():\n",
    "        wordCount[w] += 1\n",
    "\n",
    "counts = [(wordCount[w], w) for w in wordCount]\n",
    "counts.sort()\n",
    "counts.reverse()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "1000 most popular words"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "words = [x[1] for x in counts[:1000]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "wordId = dict(zip(words, range(len(words))))\n",
    "wordSet = set(words)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Bag-of-words features for 1000 most popular words"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": [
    "def feature(datum):\n",
    "    feat = [0]*len(words)\n",
    "    r = ''.join([c for c in datum['review/text'].lower() if not c in punctuation])\n",
    "    for w in r.split():\n",
    "        if w in words:\n",
    "            feat[wordId[w]] += 1\n",
    "    feat.append(1) # offset\n",
    "    return feat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [],
   "source": [
    "random.shuffle(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = [feature(d) for d in data]\n",
    "y = [d['review/overall'] for d in data]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "Ntrain,Nvalid,Ntest = 4000,500,500\n",
    "Xtrain,Xvalid,Xtest = X[:Ntrain],X[Ntrain:Ntrain+Nvalid],X[Ntrain+Nvalid:]\n",
    "ytrain,yvalid,ytest = y[:Ntrain],y[Ntrain:Ntrain+Nvalid],y[Ntrain+Nvalid:]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Unregularized model (train on training set, test on test set)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = sklearn.linear_model.LinearRegression(fit_intercept=False)\n",
    "model.fit(Xtrain, ytrain)\n",
    "predictions = model.predict(Xtest)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.5621377319894532"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum((ytest - predictions)**2)/len(ytest) # Mean squared error"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Regularized model (\"ridge regression\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = linear_model.Ridge(1.0, fit_intercept=False) # MSE + 1.0 l2\n",
    "model.fit(Xtrain, ytrain)\n",
    "predictions = model.predict(Xtest)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.5553767774281313"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum((ytest - predictions)**2)/len(ytest)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Complete regularization pipeline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Track the model which works best on the validation set"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [],
   "source": [
    "bestModel = None\n",
    "bestVal = None\n",
    "bestLamb = None"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Train models for different values of lambda (or C). Keep track of the best model on the validation set."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "l = 0.01, validation MSE = 0.4933728029983656\n",
      "l = 0.1, validation MSE = 0.49292702478338096\n",
      "l = 1, validation MSE = 0.48865630138060057\n",
      "l = 10, validation MSE = 0.4585804821929264\n",
      "l = 100, validation MSE = 0.39865226713206803\n",
      "l = 1000, validation MSE = 0.413825760476879\n",
      "l = 10000, validation MSE = 0.5041631912430448\n"
     ]
    }
   ],
   "source": [
    "ls = [0.01, 0.1, 1, 10, 100, 1000, 10000]\n",
    "errorTrain = []\n",
    "errorValid = []\n",
    "\n",
    "for l in ls:\n",
    "    model = sklearn.linear_model.Ridge(l)\n",
    "    model.fit(Xtrain, ytrain)\n",
    "    predictTrain = model.predict(Xtrain)\n",
    "    MSEtrain = sum((ytrain - predictTrain)**2)/len(ytrain)\n",
    "    errorTrain.append(MSEtrain)\n",
    "    predictValid = model.predict(Xvalid)\n",
    "    MSEvalid = sum((yvalid - predictValid)**2)/len(yvalid)\n",
    "    errorValid.append(MSEvalid)\n",
    "    print(\"l = \" + str(l) + \", validation MSE = \" + str(MSEvalid))\n",
    "    if bestVal == None or MSEvalid < bestVal:\n",
    "        bestVal = MSEvalid\n",
    "        bestModel = model\n",
    "        bestLamb = l"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Using the best model from the validation set, compute the error on the test set"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.4380927014652703"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "predictTest = bestModel.predict(Xtest)\n",
    "MSEtest = sum((ytest - predictTest)**2)/len(ytest)\n",
    "MSEtest"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Plot the train/validation/test error associated with this pipeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZAAAAEfCAYAAABvWZDBAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xl8FFXW+P/PCavsYU9YAgzihrLNuIBAANkERBEQlHUERYdHx+884+ggAsOPcX1GnXEXREAJIG4ooiAQHHEcNKjsIApBSFiEhCVkI31+f3Sn7YQO6XS6053Oeb9eedF1q+rWuUnTp2/dqrqiqhhjjDElFRXqAIwxxpRPlkCMMcb4xRKIMcYYv1gCMcYY4xdLIMYYY/xiCcQYY4xfLIGYck1E4kTEISJRruWPRWSsL9v6cayHReTV0sTrxzGvF5GdAaprn4j0dr0u87aYyGMJxISUiKwSkZleyoeKSKqPH/bum5lU9UZVXeTLtsXE1VNEfi6wo+pjqnqXL/uXhIiMF5FzInJKRNJFZLOIDHId8wtVvSzQxwxWW0zFYgnEhNoCYIyX8jHAIlV1lHE8+QQfk02AfKmqdVS1HvA6sExE6pbh8Y0pMUsgJtTeBxqIyPX5BSJSDxgMLHQt3+j6Vn5SRJJFZEZRlYnIehH5vet1lIg8LSLHRGQvMKjQthNEZIfrm/9eEbnLVV4D+BiIFZHTrvVNRWSGiCzy2P8mEdkmIidEZJ2IXOqxbp+I/ElEvheRNBFJEJGqPv5OXgcuAn5TuCfkqvchEdkuIsdFZJ5nvSIyWES+dR3zCxG5sojfk7stHqf2xrl+v0dF5K8e24rrmHtdv8slrr+RqeAsgZiQUtUs4G1gnEfxbcBOVd3mWj4DjFXVujiTwBQRucmH6u8CbgQ6AL8FhhdafwS4UVXrABOBZ0Sko6qeBQYCKapa29UzOJwfMoCItAMWA/cBjYBVwIciUtmj/hFAP6C1K4YJxQXs2n8ycBr4wfOYHm4H+gK/AS4BHnHt2wmY59q/PvAKsEJEqhRxuML1dgMuBm4AHhWRS1zl9wE3Ad2BWCANeLG4tpjIZwnEhIMFwAiPb9JjXWUAqOrnqrrd9XobsATo6UO9I4BnVTVFVdOBxzxXquoqVd3vev1vYDXOD0lfjAQ+UtV1qpoHPI2z19DVY5vnVPWI69gfAh0vUN91InICSMGZQG9W1dNFbPsvjzbNAUa7yicDL6vqN+q0CMgGrvWhPQrMVNUcVd0CfI8z6QHcDUxT1VRVzQX+Bgz392IEEzkqF7+JMcGlqhtF5Bhws4h8A/wOuCV/vYhcDTwOtAequn7e9qHqWMBzIDzZc6WIDAQeBdrh/DJ1EbDFx7BjPetTVXWdamrmsc0Rj9dngZgL1PcfVe3h47EPerxOdsUCEAeME5H/cS0LUMVjfXEKx1vLo973RCR/PEqAXKAJkOpj3SYC2TcIEy4WAeNxDp5/qqrHPNYtxjlW0sw1yPwKzg+x4qQCLTyW4/JfuHo7y4EngUaqGo3zNFR+vcUNoKd41ufSgoIf7sFSuE0prtc/A3NUtb7rJ1pVa6nq0lIe7wAwsFC9NVXVkkcFZwnEhIuFOM+9T8Lj9JVLLSBNVXNdvZHbC60vKpksA+4TkWYiEg38xWNdfk/mF1V1uHoj/TzWH8E5uF/nAnUPEpFeIlJZRP4XyAL+c+FmBsQfXG2qD/wV5yk9gNdwjg9dDSAiNV0XINT0oc4LJeRXgL+LSEtXvY18HIMyEc4SiAkLqpoMfAnUAFYUWn0vMFtETuIcMC78jVqLeP0a8CnO8/nfAO94HO8MzsHht11jD6OADzzW7wYSgJ9cV1k1LRTvHpy9peeBYzgH94eo6jkvcQTaYpzjNXtxDrTPccWUhHMc5HlXm/bg7NW5w75AnYXXeS4/h/N3s9r1N/gSuLo0DTCRQYI9oZSIDACexZms5qnqE4XWjwee4teu//Oq+rprXR7O//wCJKvqzUEN1pgwJyL7gDtVdV2oYzEmqIPorqs0ngf64DxP+7WIfKCquwptukRV7/NSRYaqdg5mjMYYY/wT7FNYVwM/qGqy6/K/JcBQL9sVdf7Vl4FSYyoSm4PahI1gJ5BmFLyM8iAFL3PMN0xEvhORZSLS3KO8mohsEpEvRcRb4jGmQlHVNnb6yoSLcBhEXwG0UtWOwGcUvAInTlWvBu4AnhWR1qEI0BhjzPmCfSPhIaClx3JzV5mbqqZ5LM7FeV1+/rpU17/7RCQR6ATs89xfRKxLb4wxflDVUg0TBLsH8jXQ1vWwtqo4L5UscIlmocsjhwI7XOX18h9tISINcT4iYoe3g6hqqX5mzJhR6u28rfOlzHPZ22tfYwvH9hXV1gttE87tK+nfrizbV9K2hUv7gvW3C0T7ytN705/2BUJQeyCqmiciU3Fes55/Ge9OEZkFfK2qH+G80esmnI9GOMGvD5y7DHjFdSlvFPCYnn/1VkDEx8eXejtv63wp81wu6nVphap9RbU1kG0rSX2lbV8o/na+1lfStnkrj6T3prfySGpfuHy2lDrDhvrH2YTINWPGjFCHEFTWvvItktsXyW1TVXV9dpbq8zccBtHNBQT622y4sfaVb5HcvkhuW6AE/U70YBMRLe9tMMaYsiYiaJgPohtjjIlQlkCMMcb4xRKIMcYYv1gCMcYY4xdLIMYYY/xiCcQYY4xfLIEYY4zxiyUQY4wxfrEEYowxxi+WQIwxxvjFEogxxhi/WAIxxhjjF0sgxhhj/GIJxBhjjF8sgRhjjPGLJRBjjDF+sQRijDHGL5ZAjDHG+MUSiDHGGL9YAjHGGOMXSyDGGGP8YgnEGGOMXyyBGGOM8YslEGOMMX6xBGKMMcYvlkCMMcb4JegJREQGiMguEdkjIn/xsn68iBwVkc2un98XWrdHRHaLyLhgx2qMMcZ3oqrBq1wkCtgD9AFSgK+BUaq6y2Ob8UAXVb2v0L7RwDdAZ0CAJKCzqp4stJ0Gsw3GGBNpMjMzqVGjBqoqpakn2D2Qq4EfVDVZVXOBJcBQL9t5a0R/YLWqnlTVdGA1MCB4oRpjTORzOBwsX748IHUFO4E0A372WD7oKitsmIh8JyLLRCR/feF9DxWxrzHGGB99+umniJSq4+EWDoPoK4BWqtoR+AxYGOJ4jDEmIn3zzTf8+OOPDB8+PCD1VQ5ILUU7BLT0WG7uKnNT1TSPxbnAEx77xhfad723g8ycOdP9Oj4+nvj4eG+bGWNMhfXee+/x/fffA/D4448HpM5gD6JXAnbjHERPBTYBo1V1p8c2TVX1sOv1LcCfVbVroUH0KNfrLq7xEM9j2CC6McZcwIkTJ3j99dcZNmwYbdq0AUBESj2IHtQeiKrmichUnAPgUcA8Vd0pIrOAr1X1I+A+EbkJyAVOABNc+6aJyGyciUOBWYWThzHGmAvLysoiISGBnj17upNHoAS1B1IWrAdijDHeORwOEhISqFevHoMGDSqwLux7IGWhY8eOfPfdd4iI+8qC/Nf+LIdbHZUqVaJSpUoBu2rCGFNxrFmzhry8PAYMCM4dEOU+gbRq1Yr9+/ejqu4foFTL4VRHXl4eANWrV6datWrun8LLhcu8rY+KCoeL7owxZeHbb79lz549TJo0iUqVKgXlGHYKqxw4d+4c2dnZZGdnk5WV5fW1t2XPspycHCpXrlxskikuUVWpUsV6Q8aEueTkZJYtW8bEiRNp2LCh123sFFYFUblyZSpXrkzNmjX9rkNVycnJKTYJpaWlkZOTU+Q2Doej2KTjSyIK1jciYyq69PR0li9fzrBhw4pMHoFiPRBTInl5ecX2frz1hAqXV6pUierVq9OkSRM6d+7MJZdcYknFmFLKzs7m9ddfp3PnzlxzzTUX3DYQPRBLIKbMqSq5ublkZWWRnJxMUlISx48fp2PHjnTu3Jno6OhQh2hMueNwOFi6dCm1atVi8ODBxZ5qtgSCJZBIcezYMTZv3syWLVuIiYmhS5cutGvXznolxvjos88+4+DBg4wdO9an/zeWQLAEEmnOnTvHjh07SEpK4sSJE3Ts2JEuXbpQr169UIdmTNj6/vvv2bBhA5MmTaJGjRo+7WMJBEsgkezYsWMkJSWxZcsWYmNjrVdijBc///wzS5YsYfz48TRu3Njn/SyBYAmkIsjNzXX3StLS0ujUqROdO3e2Xomp8E6ePMncuXMZMmQI7dq183m/2bNn8+ijj1oCsQRSsRw9epSkpCS2bt1Ks2bN3L0Su0nSVDQ5OTm8/vrrXHXVVXTt2tXn/TZv3syAAQM4duyYJRBLIBWTZ68kPT2dTp060alTJ+uVmApBVVm2bBnVq1fnpptuKtHNvQ888ACdO3dm3LhxlkAsgRjPXknz5s3p0qULF198sfVKTMRat24d+/fvZ9y4cVSuXLL7wfM/L6OioiyBWAIx+XJzc9m+fTtJSUmcPHnSPVZSt27dUIdmTMBs3bqVdevWMWnSpFI9ncIG0bEEYrw7cuSIu1fSokUL65WYiHDo0CEWL17MuHHjaNKkSanqsgSCJRBzYTk5OWzfvp3Nmzdz6tQpd6+kTp06oQ7NmBI5deoUc+fOZdCgQVxyySWlrs8SCJZAjO8OHz5MUlIS27Zto2XLlnTp0oW2bdtar8SEvdzcXObPn8/ll1/O9ddfX6J9v//+e+rUqUPr1q0LlFsCwRKIKbn8XklSUhKnT5+mc+fOdOrUyXolJiypKu+88w6VKlXi5ptvLtEVV5mZmXTs2JE5c+YwfPjwAussgWAJxJSOZ68kLi6OLl268Jvf/MZ6JSZsbNiwgb179zJ+/PgSX3H1pz/9iZSUFBISEs5bZwkESyAmMHJycti2bRtJSUlkZGS4x0pq164d6tBMBbZ9+3bWrFnDpEmTqFWrVon2/eKLLxg5ciRbt26lQYMG5623BIIlEBN4qampJCUlsX37dlq1akXnzp2tV2LKXGpqKm+++SZjxowhJiamRPtmZGTQsWNHnn76aYYOHep1G0sgWAIxwZOTk8PWrVvZvHkzGRkZ7rES65WYYDt9+jRz586lf//+XH755SXef+3atSQkJDB37twit7EEgiUQUzYK90ryx0psfngTaLm5uSxYsICLL76Ynj17Bu04lkCwBGLKVnZ2tnusJDMzk86dO9OxY0frlZiAUFXee+89VJVhw4YF9QuKJRAsgZjQSUlJISkpiR07dtC6dWu6dOlCmzZtrFdi/Pbvf/+bXbt2MWHCBKpUqRLUY1kCwRKICb3s7Gy2bt1KUlISWVlZ7rGSkl41Yyq2nTt3smrVKiZPnlwmPVpLIFgCMeFDVUlNTeWbb75h586dtGnThvj4eBo1ahTq0EyYO3z4MIsWLeL222+nWbNmJd7/1KlTrF+/vsgrrryxBIIlEBOesrOz2bx5M1988QWDBw/msssuC3VIJkydOXOGuXPncsMNN9C+fXu/6pg8eTIiwquvvurzPoFIICW7rdEY45Nq1apx3XXXERcXx7Jly0hNTSU+Pt7uJTEFnDt3jmXLltGhQwe/k8cnn3zCmjVr2LJlS4CjK17Q380iMkBEdonIHhH5ywW2u1VEHCLS2bUcJyJnRWSz6+fFYMdqTKDFxsYyefJkDhw4wJIlS8jKygp1SCZMqCofffQRtWrVIj4+3q860tPTmTx5MnPnzg3Js9yCegpLRKKAPUAfIAX4GhilqrsKbVcLWAlUAaaq6mYRiQM+VNWrijmGncIyYS8vL4/Vq1ezd+9ebrvtNho3bhzqkEyIbdy4kW3btjFx4kSqVq3qVx0TJ07koosu4sUXS/79OhCnsILdA7ka+EFVk1U1F1gCeBvlmQ08DmQXKrfrIU1EqFSpEgMHDqR79+4sWLCAHTt2hDokE0J79uzhv//9L6NGjfI7eZw8eZLU1FSefPLJAEfnu2AnkGbAzx7LB11lbiLSCWiuqqu87N9KRJJEZL2IlOwh+MaEoY4dO3LHHXewevVq1q5di8PhCHVIpowdPXqUDz74gJEjR5ZquuW6devyySefhPRy8ZAOoovzjqt/AOM9i13/pgItVTXNNS7yvohcrqpnCtczc+ZM9+v4+Hi/zycaUxbyx0XefvttEhISGDZsGBdddFGowzJlICMjg4SEBPr370/z5s3L9NiJiYkkJiYGtM5gj4FcC8xU1QGu5YcAVdUnXMt1gL3AGZyJoylwHLhJVTcXqms98Ccv5TYGYsqlvLw81qxZww8//GDjIhVAXl4eixYtokWLFvTp0yfU4ZSLMZCvgbauK6qqAqOAFfkrVfWUqjZW1Taq2hr4ChjiGkRv6BqER0TaAG2Bn4IcrzFlplKlSgwYMIAePXrYuEiEU1VWrlxJ9erV6d27d6jDCZigJhBVzQOmAquB7cASVd0pIrNEZLC3Xfj1FFYPYIuIbAaWAXeranow4zUmFDp06GDjIhHuv//9LykpKaV+QOL8+fM5e/ZsACMrHbsT3ZgwkZGRwfLly6lcubKNi0SQvXv38sEHH3DnnXdSr149v+t5++23mT59Ot9++21A3hv2KBMsgZjI4nA4WL16NXv27GHUqFE2LlLOHTt2jDfeeIPbbruNli1b+l3P0aNHueqqq3j//fe59tprAxJbeRgDMcaUQFRUFAMGDCA+Pp4FCxawffv2UId0npUrV5KeXvBscnp6OitXrgxRROHp7NmzJCQk0Ldv31IlD1VlypQpTJgwIWDJI1AsgRgThq666irGjBnDmjVr+Oyzz8JqXKRbt25MmzbNnUTS09OZNm0a3bp1C3Fk4SMvL4/ly5dz6aWX0rFjx1LVtWTJEnbv3l3gdoVwYQnEmDAVExPD5MmTOXToEIsXLyYzMzPUIQFQr1495syZw7Rp09i/fz/Tpk1jzpw5pTq/H0lUlVWrVlG5cmVuuOGGUtf31VdfsWDBAqpXrx6A6ALLxkCMCXMOh4M1a9awe/dubrvtNpo0aRLqkADYv38/rVu3Zt++fbRq1SrU4YSNTZs28c0333DnnXdSrVq1UIdTJBsDMaYCiIqKon///sTHx7Nw4cKwGBdJT0/nqaeeYt++fTz11FPnjYlUVD/++COff/45o0ePDuvkESjF9kBEpDowGOgOxAKZwDZgpaqG/J1sPRBTkaSmprJ06VKuuOIK+vTpE5L5RfLHPPJPWxVerqiOHz/O/PnzGT58eLnokQX9Ml4RmYUzeSQCScBRoDrQDujlev0nVS37mUx+jdESiKlQzp49y/Lly4mKiuLWW28t8/tFVq5cSbdu3Qoki/T0dDZu3MigQYPKNJZwkZmZybx58+jatSudO3cOdTg+KYsEMkhVi7w2T0Qa43zg4TelCaI0LIGYiihcx0UqIofDwVtvvUWjRo0YMGBAqetbsWIFbdu25fLLLw9AdEUL+hhIMcmjsqoeDWXyMKaiyh8X6dWrFwsXLmTbtm2hDqnC+vTTTxER+vXrV+q6Dhw4wJ133kleXl4AIgu+CyYQEfnC4/WiQqs3BSUiY4zPrrzySsaMGcPatWtZs2ZNWN0vUhF88803/PTTTwwfPrzU41GqyqRJk3jggQe48sorAxRhcBXX4poer68otM5mCzQmDOTfL5Kamspbb70VVg/bi2T79u0jMTGRUaNGBeQejVdffZW0tDQefPDBAERXNopLIBcaXLCBB2PCRI0aNRgzZgxNmjThtdde4/Dhw6EOKaKdOHGCd955h1tvvZUGDRqUur59+/Yxbdo0FixYQOXKIZ3nr0SKi7SeiNyCM9HUE5FhrnIB/J+L0RgTcFFRUfTr14+YmBgWLVrEwIEDad++fajDijhZWVkkJCTQs2dPWrduHZA6d+zYwYwZM4I+cB5oxV2FNf9CO6vqxIBHVEJ2FZYx5zt8+DBLly7lsssu44YbbgjJ/SKRyOFwkJCQQL169cr9Jcv2OHcsgRhTlLNnz/LOO+8AcOutt1KjRo0QR1T+ffrppxw5coQ77riDSpUqhTqcUgn6ZbwiMkRE4jyWHxWR70VkhYgEpu9mjAmKGjVqcMcdd9C0aVMbFwmAb7/9lj179jBixIhynzwCpbh+7RzgGIBrCtoxwO9xzmv+cnBDM8aUVlRUFH379qVPnz4sWrSIrVu3hjqkcik5OZm1a9cyevRomynSQ7FXYalq/jWBw4B5qpqkqnOBRsENzRgTKO3bt2fs2LGsW7eOTz/91O4XKYH09HSWL1/OLbfcQsOGDQNS5549e1iwYEFA6gql4hKIiEgtEYkC+gBrPdaF38PpjTFFatq0KZMnT+bo0aO8+eabdr+ID7Kzs0lISOD666/nN7/5TUDqzMvLY8KECZw6dSog9YVScQnkWeA74BtgZ/5jS0SkE5Aa5NiMMQGWPy4SExPDa6+9Rmqq/TcuisPh4N1336V58+ZcffXVAav3mWeeoVq1avzhD38IWJ2h4svj3JsBjYHvVdXhKosBqqjqgeCHeGF2FZYx/tm2bRurVq1iwIAB5ebRGWUlNzeX9evXk5KSwtixYwM2aL5z50569OjBpk2bAnYPib/K4mm8F3wusapuLs3BA8ESiDH+O3LkCEuXLuWSSy6hb9++FfJ+kXPnznH06FFSUlLcP8ePHyc2NpbbbrstYJc/nzt3jm7dujFx4kSmTJkSkDpLoywSiAPn5FG/5Bd5rFZV7V2agweCJRBjSiczM5N33nmHvLw8hg8fTs2aNYvfqZzKy8srkCxSU1M5duwYDRo0ICYmhtjYWGJjY2nSpEnAHymSmprK448/zrPPPotI6B8lWBYJ5I/AcOAksAR4T1XPlOaAgWYJxJjSczgcrFu3jm3btnHbbbcRExMT6pBKLS8vj2PHjpGamupOGEePHiU6OtqdKGJiYmjatClVqlQJdbhlrszuRBeRNsAoYCiQDPxdVb8rzYEDxRKIMYGzfft2Pv74Y/r3789VV10V6nB85nA4+OWXXwr0LI4cOULdunXdiSI2NpamTZtStWrVUIcbFgKRQHzqo6nqTyLyAXARMBbnlLZhkUCMMYFzxRVX0LBhQ5YuXUpKSgr9+vULu3ERh8PB8ePHC/QsDh8+TO3atd09i8svv5yYmBiqVasW6nAjWnGnsDx7Hj/jPI21UlUzyya84lkPxJjAC5dxEVXlxIkTBXoWqamp1KxZs0DPIiYmJiBzclQkZTWIvgX4ADhFoTlAVPUfPgQ5AOf9JFE472R/oojtbgXeBn6bf3WXiDyM89Ep54D7VXW1l/0sgRgTBJ7jIiNHjiQ2Njaox1NV0tLS3Iki/9/q1asXGLOIjY0tF48TycnJYdasWUyfPj0sk1tZnML6G78mjVolrdx1B/vzOO9iTwG+FpEPVHVXoe1qAfcBX3mUXQaMBC4DmgOficjFli2MKRtRUVHccMMNxMTE8NZbb9GvXz86dOgQkLpVlZMnTxboWaSkpFC1alV3oujWrRuxsbHl9inCc+bMYcuWLRF9Gu2CCURVZ5ay/quBH1Q1GUBEluA8Hbar0HazgccBz7kchwJLVPUcsF9EfnDV999SxmSMKYErrriCRo0asWTJEve4SElurFNVTp06VSBRpKSkUKlSJXfP4tprryUmJoZatUr8PTUsbd68mZdffpnvvvsuLC7ZDZYLJhAReQR4QVXTiljfG6ihqh8VUUUznGMn+Q7iTAKedXQCmqvqKhF5sNC+//FYPuQqM8aUscaNGzN58mTeffddFi1axIgRI4ocFzl9+vR5PQvA3bP43e9+R2xsLLVr1y7LJpSZ7Oxsxo0bxzPPPBMRl0NfSHGnsLYCH4lIFrAZ56PdqwMXAx2Bz4C/+3twcabmfwDj/a3DGFM2LrroIkaPHs369et57bXXGDlyJHXq1DmvZ5GXl+fuWXTu3JlBgwZRp06diP4m7mnWrFm0a9eO0aNHhzqUoCvuFNYHwAcicjHQDYjBOZj+JnCXD1djHQJaeiw3d5Xlqw1cASS6kklTYIWI3OTDvm4zZ850v46Pjyc+Pr6YsIwx/oiKiqJPnz7ExMSwcOFCRMTds+jQoQMDBw6kbt26FSZZFKaqqCovvfRS2P0OEhMTSUxMDGidQZ3SVkQqAbtxDqKnApuA0aq6s4jt1wP/T1W/FZHLgbeAa3CeuloDnDeIbldhGRMa586do1KlSmH3QWl8U2Y3EvpLVfNEZCqwml8v490pIrOAr72MnSiu522p6g4RWQbsAHKBey1TGBM+Av2sKFP+BLUHUhasB2KMMSUXiB5Isc8oEJFKIvJAaQ5ijDEm8hSbQFQ1D4j8ywmMMcYPTz/9NNu3bw91GCHh69N4nwGqAEuBjPxym1DKGFORff7554waNYqtW7fSoEGDUIdTImX5OPf1XoptQiljTIV15swZOnTowDPPPMNNN90U6nBKrMwSSDizBGKMCYWpU6dy+vRpFixYEOpQ/FJml/GKSF1gBtDDVbQB+JuqnizNwY0xpjxat24d77//Plu3bg11KCHl60wxrwOncT4ddyTOu9HnBysoY4wJZ5mZmbz++utER0eHOpSQ8nUM5DtV7VhcWSjYKSxjjCm5MrkPxCVTRK73OHA3IGxmJTTGGFP2fH0WwRRgoWssBCANe4KuMcZUaMUmENesgpeoagcRqQOgqqeCHpkxxpiw5sud6A5cMwWq6ilLHsaYimbVqlXMmzcv1GGEHV/HQD4Tkf8VkRYiUj//J6iRGWNMGEhLS2Py5Mm0adMm1KGEHV+vwtrnpVhVNeS/UbsKyxgTTOPHj6dOnTr861//CnUoAVUmNxK6xkDGqOrG0hzIGGPKmxUrVrBx40a+//77UIcSlnztgXyrqp3KIJ4Ssx6IMSYYjh8/zlVXXcWSJUvo3r17qMMJuLK8D2StiNwqNnelMaaCyMzMZPr06RGZPALF1x7IaaAmkIfzBkLBOQZSJ7jhFc96IMYYU3Jl9jBFVa1dmoMYY4yJPD6dwhKnMSIy3bXcQkSuDm5oxhhjwpmvYyAvAtcBt7uWzwAvBCUiY4wJgYMHD5KXlxfqMMoVXxPINar6ByALQFXTgKpBi8oYY8rQwYMH6dq1K1988UWoQylXfE0guSJSCVAAEWkEOIIWlTHGlJGTJ09y4403MnXqVHr27BnqcMoVXxPIP4H3gMYiMge4IVrbAAAdTklEQVT4Avh70KIyxpgykJ2dzS233EJ8fDx//vOfQx1OuePznOgicinQB+clvGtVdWcwA/OVXcZrjPGHw+HgjjvuICcnh2XLllGpUqVQh1SmyuwyXgBV3QXsKs3BjDEmXJw9e5YmTZrw2GOPVbjkESg+90DClfVAjDGm5MryUSbGGGNMAUFPICIyQER2icgeEfmLl/V3i8gWEflWRD53jbUgInEiclZENrt+Xgx2rMYYY3wX1FNYrkfB78E5+J4CfA2Mco2n5G9TS1XPuF4PAe5V1YEiEgd8qKpXFXMMO4VljClWcnIysbGxVKlSJdShhIXycArrauAHVU1W1VxgCTDUc4P85OFSi4L3l9jTf40xpXbo0CF69OjB+vXrQx1KRAl2AmkG/OyxfNBVVoCI3Csie4HHgfs8VrUSkSQRWS8i1wc3VGNMJMq/UfDee++lX79+oQ4nooTFILqqvqiqbYG/ANNdxalAS1XtAvwJWCwitUIVozGm/MnJyWHYsGF0796dBx98MNThRByf7wPx0yGgpcdyc1dZUZYCLwOoag6Q43q9WUR+BNoBmwvvNHPmTPfr+Ph44uPjSxm2Maa8czgcTJw4kbp16/Lcc89R0efDS0xMJDExMaB1BnsQvRKwG+cgeiqwCRjteRe7iLRV1b2u10OA6ap6tYg0BE6oqkNE2gAbgCtVNb3QMWwQ3RhznszMTGbMmMGsWbO46KKLQh1O2AnEIHrQbyQUkQHAczhPl81T1cdFZBbwtap+JCLPAjfg7G2kAVNVdaeIDAP+5ip3AI+q6sde6rcEYowxJVQuEkiwWQIxxpiSKw+X8RpjjIlQlkCMMRFh3759ZGVlhTqMCsUSiDGm3EtJSaFXr16sXbs21KFUKJZAjDHl2qlTp7jxxhu5++67GTRoUKjDqVAidhC9VatWJCcnhyAiU17ExcWxf//+UIdhSiEnJ4cbb7yRdu3a8cILL1T4ez1Kwq7CougE4vrlhCAiU17Ye6R8U1XGjRvH6dOneeedd2xSqBKyq7CMMRVWbm4urVq1YvHixZY8QsR6IKbCsveIqcisB2KMMSZkLIGUY/fccw9z5swJ+LbGGOMLO4UVIq1bt2bevHn07t071KFUWOH+HjEF/fTTTzRq1IjatWuHOpSIYKewIlheXl6oQwgKbx/YJf0Qj9TfjSlaamoqffr0sRsFw4wlkBAYN24cBw4cYMiQIdSpU4enn36a5ORkoqKieP3114mLi6NPnz4AjBw5kpiYGKKjo4mPj2fHjh3ueiZOnMijjz4KwIYNG2jRogX/+Mc/aNKkCc2aNeONN97wa9sTJ04wZMgQ6tatyzXXXMP06dPp3r17ke356quv6NatG9HR0XTq1IkNGza41/Xq1YtHHnmE66+/npo1a7Jv3z6vZampqQwdOpQGDRrQrl075s6d665j1qxZjBgxgrFjx1KvXj0WLFhQqt+/KV/ybxScNGkSN998c6jDMZ5UtVz/OJtwvqLKw0WrVq103bp17uX9+/eriOj48eP17NmzmpWVpaqq8+fP14yMDM3JydEHHnhAO3bs6N5nwoQJOn36dFVVTUxM1MqVK+vMmTP13Llz+vHHH2uNGjU0PT29xNvedtttOnr0aM3KytIdO3ZoixYttHv37l7bcejQIW3QoIF+8sknqqr62WefaYMGDfSXX35RVdX4+HiNi4vTnTt3al5enubm5not69Gjh06dOlVzcnL0u+++00aNGun69etVVXXmzJlatWpVXbFihaqq+3dTWuH+HjGq2dnZ2rdvX7377rvV4XCEOpyI4nr/l+rzt0L3QGbOnImInPfjOcNhcdsXta0vtNCpGxFxT35TrVo1ACZMmECNGjWoUqUKjz76KN9//z2nT5/2Wl/VqlWZPn06lSpVYuDAgdSqVYvdu3eXaFuHw8G7777L3/72N6pVq8Zll13G+PHji2zDm2++yaBBg+jfvz8Affr04be//S0ff/zr1C0TJkzg0ksvJSoqisqVK59XdvjwYb788kueeOIJqlSpQocOHZg0aRILFy5013HdddcxZMgQAPfvxkQ2VWXy5MlUr16d559/3u4yD0MVPoF4y6oXSiC+buuv5s2bu187HA4eeugh2rZtS7169WjdujUiwi+//OJ13wYNGhAV9euftEaNGpw5c6ZE2x47doy8vLwCcbRo0aLIeJOTk1m2bBn169enfv36REdHs3HjRg4fPnzB/T3LUlJSqF+/PjVq1HCXxcXFcejQIa/bm4ohLy+Pdu3asWTJEvcXDxNe7K8SIkV9m/IsX7x4MR9++CHr1q2jZcuWnDx5kujo6KBeOdSoUSMqV67MwYMHadu2LQA///xzkdu3aNGCcePG8corrxS5jbe2epbFxsZy4sQJMjIyqFmzJgAHDhygWbNmF6zDRLbKlSszbdq0UIdhLqBC90BCqWnTpvz0008FygonhtOnT1OtWjWio6PJyMjg4YcfDvoHaVRUFMOGDWPmzJlkZmaya9euAqeSChszZgwffvghq1evxuFwkJWVxYYNG0hJSfH5mM2bN6dr1648/PDDZGdns2XLFubNm8fYsWMD0SRjTJBYAgmRhx56iNmzZ1O/fn3+8Y9/AOd/yx43bhwtW7akWbNmtG/fnq5du5boGCVJNp7b/utf/yI9PZ2YmBjGjx/P7bffXuS4Q/Pmzfnggw/4+9//TqNGjYiLi+Ppp5/G4XAUGYO3soSEBPbt20dsbCy33nors2fPplevXj7Hb4wpe3YjoSnWQw89xJEjR5g/f36oQwkoe4+El71797rH0kzw2Y2EJih2797N1q1bAdi0aRPz5s1j2LBhIY7KRLLDhw/Tt29fu1GwnLFBdHOe06dPM3r0aFJTU2nSpAl//vOf3ZfQGhNop0+f5sYbb+T3v/89I0aMCHU4pgTsFJapsOw9Enq5ubkMGTKEli1b8sorr9jVdmXIZiTEEojxn71HQktVmThxIsePH+e9996zez3KmI2BGGPKLYfDwZVXXmk3CpZj1gMxFZa9R0xFZj0QY4wxIWMJxBhjjF+CnkBEZICI7BKRPSLyFy/r7xaRLSLyrYh8LiKXeqx7WER+EJGdItIv2LGWB/lzeeRr3749n3/+uU/blpRNg2sCae/evQUesmnKv6AmEBGJAp4H+gNXAKM9E4TLW6p6lap2Ap4CnnHtezkwErgMGAi8KHaNH1DwUSDbtm2jR48ePm17IQsWLDhv0qiXXnrJHmZnAuLw4cP069ePdevWhToUE0DB7oFcDfygqsmqmgssAYZ6bqCqns8brwU4XK9vApao6jlV3Q/84KrPBIGqhuU1+N6mry3plLY2BW5onTlzhsGDB7ufq2YiR7ATSDPA81ngB11lBYjIvSKyF3gcuK+IfQ9527c8evLJJ8+74/b+++/nj3/8IwBvvPEGl19+OXXq1KFt27a8+uqrRdbVunVr97e6rKwsJkyYQP369Wnfvj1ff/11gW2feOIJ2rZtS506dWjfvj3vv/8+ALt27eKee+7hP//5D7Vr13Y/i8hzGlyA1157jYsvvpiGDRty8803k5qa6l4XFRXFK6+8Qrt27ahfvz5Tp04tMmZV5fHHH6dt27Y0atSIUaNGkZ6eDuB1at+ipvtdsWIF7du3p379+vTu3Ztdu3YV+L08+eSTdOjQgVq1arkf7mjKVm5uLiNGjKBTp04F3ksmQpR2SsML/QC3Aq96LI8B/nmB7UcBb7he/wu43WPdXGCYl30uNF1jWEpOTtaaNWvqmTNnVFU1Ly9PY2JidNOmTaqq+vHHH+u+fftUVfXzzz/XGjVq6LfffquqzuloW7Ro4a6rVatWunbtWlVV/ctf/qI9evTQ9PR0PXjwoLZv377AtsuXL9fDhw+rquqyZcu0Zs2a7uU33njjvGlrPafBXbt2rTZs2FC/++47zcnJ0f/5n//RHj16uLcVER0yZIieOnVKDxw4oI0aNdJPP/3Ua/ufffZZve666zQlJUVzcnJ0ypQpOnr0aFX1PrWvt7I9e/ZozZo1de3atXru3Dl98skntW3btpqbm+v+vXTq1EkPHTpU5BS44fweiQQOh0MnTJigN954o/vvYsIHAZjSNth37xwCWnosN3eVFWUp8LLHvp4jwEXu6zkrYHx8PPHx8T4FN2vWLJ+2K86MGTNKtH3Lli3p3Lkz7733HmPGjGHt2rXUrFmT3/3udwAMHDjQvW337t3p168f//73v+nYseMF63377bd5+eWXqVu3LnXr1uW+++5j9uzZ7vW33nqr+/WIESP4+9//zqZNm3x6ztXixYu588476dChAwCPPfYY0dHRHDhwgJYtnX/ihx9+mNq1a1O7dm169erFd999R79+51/78Morr/DCCy8QExMDwKOPPkpcXBxvvvkmUHBq33yFy5YuXcrgwYPp3bs3AP/7v//Lc889x5dffukeE7r//vuJjY0ttm0meH73u98xbtw4u1EwDCQmJpKYmBjQOoP9V/0aaCsicUAqzh7GaM8NRKStqu51LQ4G9rherwDeEpFncJ66agts8nYQf6eVLekHfyCNHj2ahIQExowZQ0JCQoFzw6tWreJvf/sbe/bsweFwkJmZyVVXXVVsnSkpKQWmoo2LiyuwfuHChTzzzDPs378fgIyMjCKnx/VWd5cuXdzLNWvWpEGDBhw6dMidQJo0aeJef6HpdJOTk7nlllvcU+qqKlWqVOHIkSPubTzb4a0sJSWlQPtEhBYtWhSYBtdbHabsiAj33ntvqMMwLoW/XAfiC3RQx0BUNQ+YCqwGtuMcFN8pIrNEZLBrs6kisk1ENgN/BMa79t0BLAN2AB8D97q6XRFhxIgRJCYmcujQId577z13AsnJyWH48OE8+OCDHDt2jLS0NAYOHOjTHdMxMTEFpp9NTk52vz5w4AB33XUXL774ImlpaaSlpXHFFVe46y1uAD02NrZAfRkZGRw/ftyvD+mWLVuyatUqTpw4wYkTJ0hLSyMjI8PdIykqnsLT4HrGA86pdz3jCceLAoyJJEG/D0RVP1HVS1T1YlV93FU2Q1U/cr3+o6q2V9XOqtpHVXd67PuYqrZV1ctUdXWwYy1LDRs2pGfPnkycOJE2bdpwySWXAM4EkpOTQ8OGDYmKimLVqlWsXu1b00eOHMljjz1Geno6Bw8e5Pnnn3evy8jIICoqioYNG+JwOJg/fz7btm1zr2/SpAkHDx4kNzfXa92jR49m/vz5bNmyhezsbP76179y7bXX+nWfyd13381f//pXDhw4AMCxY8dYsWKFe723ZFm4bOTIkaxcuZL169dz7tw5nn76aapXr851111X4niMMf6xO9FD6Pbbb2ft2rXccccd7rJatWrxz3/+kxEjRlC/fn2WLFnC0KFDi6zD81v2jBkzaNmyJa1bt2bAgAGMGzfOve6yyy7jT3/6E9deey1NmzZl+/btXH/99e71vXv35oorrqBp06Y0btz4vOP06dOH2bNnM2zYMJo1a8a+fftYsmSJ1zi8LXu6//77GTp0KP369aNu3bp07dqVTZs2XXDfwmXt2rXjzTffZOrUqTRq1IiVK1fy4Ycfus+1W++j7O3du9f9pcBUDPYwRVNh2XskcI4cOULXrl2ZMWNGgS8uJnzZwxSNMSF35swZBg0axJgxYyx5VDDWAzEVlr1HSi83N5ehQ4cSExPD3Llz7dRhOWI9EGNMyKgqU6ZMAeDll1+25FEB2d09xhi/de/eneHDh1OlSpVQh2JCwE5hmQrL3iOmIrNTWMaYoNu5cydz5syxB1Ka81gCMcac58SJE7z44otcc8019OnTh5MnT5KZmRnqsEyYsVNYpsKy94h3jz76KP/85z8ZMGAA48ePp2/fvvYwxAgUiFNYlkBMhWXvEe+2b99ObGws0dHRoQ7FBJGNgZRjnhNBlYa3qWiNKc6xY8f4/PPPva674oorLHkYn1S4BLJy5Ur37Hf50tPTWblyZZnWESga4KlobQrZyJWTk8P777/PzTffzMUXX8y7774b6pBMeVfaGalC/UMJZyRMS0vTe++9V9PS0rwu+6K0dYwdO1ajoqK0Ro0aWrt2bX3qqadUVfU///mPdu3aVevVq6cdO3bUxMRE9z7z58/XNm3aaO3atbVNmza6ePFi3blzp1avXl0rV66stWrV0ujoaK/HO3nypN55550aExOjzZs310ceeUQdDoeqOmci7Natmz7wwAPaoEEDnT59utcyh8Ohs2fP1ri4OG3SpImOHz9eT548qaq/ziI4b948bdmypfbs2dPn32UoFfUeiTQOh0P/+Mc/aqNGjbR79+46b94899/OVFwEYEbCkCeAUjfAjylt8z/w9+3bV+LkEag6WrVqpevWrXMvHzp0SBs0aKCffPKJqqp+9tln2qBBA/3ll180IyND69Spoz/88IOqqh4+fFh37Nihqt6noi3s5ptv1nvuuUczMzP12LFjes011+irr77q3r9y5cr6wgsvaF5enmZlZXktmzdvnl588cW6f/9+zcjI0GHDhunYsWNV1fs0tOVBRUkgqs4vIHv37g11GCaMWALxM4Goqu7bt08B99zj/ihNHZ5zmauqPvHEEzpu3LgC2/Tv318XLlyoGRkZGh0dre+++65mZmYW2Ka4BHLkyBGtVq1agQ/1hIQE7dWrl3v/uLi48+osXNanTx996aWX3Mu7d+/WKlWqaF5enu7fv1+joqJ0//79PrU9XERaAsnMzNSjR4+GOgxTTgQigVS4MRBwjlc89dRT7Nu3j6eeeuq88YyyqsNTcnIyy5Yto379+tSvX5/o6Gg2btxIamoqNWrUYOnSpbz00kvExMQwZMgQdu/e7XO9ubm5xMTEuOudMmVKgalsvU0KVbis8BSycXFxnDt3rthpaE1wqSqbNm3i3nvvpVmzZsyfPz/UIZkKpMIlkPT0dKZNm8acOXNo1aoVc+bMYdq0aSVKAIGoo/DAd4sWLRg3blyBaV5Pnz7Ngw8+CEDfvn1ZvXo1hw8f5pJLLuGuu+7yWk9hLVq0oHr16hw/ftxdb3p6Olu2bCkyFm9lhaeQTU5OpkqVKgXmQbeH6ZWd9PR0nnjiCS6//HJuv/12YmNj2bx5s/v9YkxZqHAJZOPGjcyZM4d69eoBUK9ePebMmcPGjRvLtI6mTZvy008/uZfHjBnDhx9+yOrVq3E4HGRlZbFhwwZSUlI4evQoK1as4OzZs1SpUoVatWoRFeX80xU3FW3Tpk3p168fDzzwAKdPn0ZV+emnn4q8hLMoo0eP5plnnmH//v2cOXOGadOmMWrUKHcczh6xKSt5eXn8+OOPzJ07lx9++IFHHnmkQA/RmDJR2nNgof7BzzGQUPvggw+0ZcuWGh0drf/3f/+nqqqbNm3Snj17av369bVx48Y6ePBg/fnnnzU1NVV79uyp9erV0+joaO3Vq5fu3LlTVVVzcnJ08ODBWr9+fW3UqJHXY506dUrvuecebd68udarV087d+6sS5cuVVXvYyjeyvKvwmrRooU2btxYx40bp+np6aqq7jGQvLy8gP6Ogi3c3yOqzt97efu9mvKBAIyB2J3opsIK5/dIcnIyixYtYsGCBTz//PP0798/1CGZCGN3ohsTQTIyMli4cCF9+vShc+fOpKSk8NZbb9GvX79Qh2aMV/aENGPCxIcffsiyZcuYMmUKQ4YMoXr16qEOyZgLslNYpsKy94ipyOwUljHlyKlTp3j99dcZMGAAGRkZoQ7HmFKzBGJMEOXl5fHZZ58xduxYWrZsyYoVK5gyZQpVq1YNdWjGlJqdwjIVloiQmZnJ2bNnycnJITs7m5ycHHJycmjcuDGNGjU6b5+kpCR27Nhx3va9e/fmt7/97XnbT548maSkJCZMmMDo0aO91mlMKATiFFZEDKJ/8cUX55W1aNHC7ow2FxQXF8fMmTN59dVXqVq1KtWqVaNq1apUrVqVhx9+mDFjxpy3z/bt21mzZo17u/z9srOzvR7jueeeo0aNGsFuijEhEfQeiIgMAJ7Febpsnqo+UWj9A8AkIBc4BvxeVX92rcsDvgcESFbVm73Ur926dTvvuH/4wx8YPXr0eeXPP/88S5YsKdfbL1261Ov2o0aN8rr9smXLziu/9957vW7/wgsvFLn9bbfddl75iy++6HX7e+65p8jt3377bfdyVFQU1apV46677uLmm8/787Jy5Uq++uqr8z6wu3XrRocOHc7b/qeffuLIkSPnJYQGDRpQu3bt87Y3pqIK+yltRSQK2AP0AVKAr4FRqrrLY5uewH9VNUtEpgDxqjrKte6UqtYp5hheT2FFisTEROLj40MdRtBY+8q3SG5fJLcNysdVWFcDP6hqsqrmAkuAoZ4bqOoGVc1yLX4FNPNYXeHPQSUmJoY6hKCy9pVvkdy+SG5boAQ7gTQDfvZYPkjBBFHYncAqj+VqIrJJRL4UkaFF7VRavr5RLrSdt3W+lHkuF/W6tELVvqLaGuj/mGXVvlD87Xytr6Rt81YeSe9Nb+WR1L5w+WwJm8t4RWQM0AV4yqM4TlWvBu4AnhWR1sE4dqT/kS2BFL+dJZDIem96K4+k9oXLZ0uwx0CuBWaq6gDX8kM4nwBZeCD9BuA5oIeqHi+irvnAh6r6bqHyyB0AMcaYIAr3QfRKwG6cg+ipwCZgtKru9NimE/A20F9Vf/QorwecVdUcEWkIbASGeg7AG2OMCZ2g3geiqnkiMhVYza+X8e4UkVnA16r6EfAkUBN4W5w3buRfrnsZ8IrrUt4o4DFLHsYYEz7K/Z3oxhhjQiNsBtGNMcaUL5ZAjDHG+CViE4iIDBWRV0UkQUT6hjqeQBKR1iIyV0TOf4ZIOSciNUTkDRF5RURuD3U8gRbJfzuI7P93ACJyqYi8JCLLXE/OiDiu/4Nfi8iNxW4b6WMgrqu5nlLVyaGOJdBEZJmqjgx1HIHkuh8oTVVXisiS/MfaRJpI/Nt5iuT/dwCuC34WqOq4UMcSaK6LnE4DO1T14wttG/Y9EBGZJyJHRGRLofIBIrJLRPaIyF8uUMUjwAvBjdI/AWhb2POjjc359ekFeWUWqJ8i/W9YivaF7f87T/60T0SGAB8BF/xwDQclbZ/rnrwdOB9sW/w9Iqoa1j/A9UBHYItHWRSwF4gDqgDfAZe61o0F/gHEAo8DvUPdhiC0Lca1/Hao2xCENt4B3Oh6vTjU8Qe6fR7bhP3fzt/2hfv/u0D8/VzbfRTq+APdPuD/c33GfAq8V1z9Yd8DUdUvgLRCxUU+pFFVF6nq/wNuxXkD43ARuassY/ZVKdqWLSIvAR3D/dttSdsIvIfzb/YC8GHZReqfkrZPROqXl78d+NW+/yHM/9958qN9PUXkORF5GVhZttGWnB+fMY+4PmPeAl4rrv7yOqGUt4c0Xu25gar+C/hXWQYVIL607QRwT1kGFWBFtlFVzwK/D0VQAXSh9pX3vx1cuH3l9f+dpwu1bwOwIRRBBZAvnzELfako7HsgxhhjwlN5TSCHgJYey81dZZEgktuWL9LbaO0r36x9PiovCUQoeEXA10BbEYkTkarAKGBFSCIrvUhuW75Ib6O1z9oXzoLXvlBfJeDDVQSLcU6Hmw0cACa6ygfifNLvD8BDoY7T2lYx22jts/aF80+w2xfxNxIaY4wJjvJyCssYY0yYsQRijDHGL5ZAjDHG+MUSiDHGGL9YAjHGGOMXSyDGGGP8YgnEGGOMXyyBGGOM8YslEGOCQETqicj9oY7DmGCyBGJMEKhqOlBfRAaHOhZjgsUSiDHBsxwYHuogjAkWSyDGBImqbgUuE5FKoY7FmGCwBGJMcKUCvUMdhDHBYAnEmCBxDaIvwk5jmQhlCcSYIBCR+4CdqvoO0EVEpLh9jClvLIEYE2AiMhCoraqrXUUbgPjQRWRMcNiEUsYEmIhUU9Vsj+WqQHVVPRXCsIwJOEsgxhhj/GKnsIwxxvjFEogxxhi/WAIxxhjjF0sgxhhj/GIJxBhjjF8sgRhjjPGLJRBjjDF++f8B4C1CAv3rBuAAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7f8f1b7414e0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.xticks([])\n",
    "plt.xlabel(r\"$\\lambda$\")\n",
    "plt.ylabel(r\"error (MSE)\")\n",
    "plt.title(r\"Validation Pipeline\")\n",
    "plt.xscale('log')\n",
    "plt.plot(ls, errorTrain, color='k', linestyle='--', label='training error')\n",
    "plt.plot(ls, errorValid, color='grey',zorder=4,label=\"validation error\")\n",
    "plt.plot([bestLamb], [MSEtest], linestyle='', marker='x', color='k', label=\"test error\")\n",
    "plt.legend(loc='lower left')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Precision, recall, and ROC curves"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Same data as pipeline above, slightly bigger dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = list(parseData(dataDir + \"beer_50000.json\"))[:10000]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Simple bag-of-words model (as in pipeline above, and in Chapter 8)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [],
   "source": [
    "wordCount = defaultdict(int)\n",
    "punctuation = set(string.punctuation)\n",
    "for d in data:\n",
    "    r = ''.join([c for c in d['review/text'].lower() if not c in punctuation])\n",
    "    for w in r.split():\n",
    "        wordCount[w] += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [],
   "source": [
    "counts = [(wordCount[w], w) for w in wordCount]\n",
    "counts.sort()\n",
    "counts.reverse()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [],
   "source": [
    "words = [x[1] for x in counts[:1000]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [],
   "source": [
    "wordId = dict(zip(words, range(len(words))))\n",
    "wordSet = set(words)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [],
   "source": [
    "def feature(datum):\n",
    "    feat = [0]*len(words)\n",
    "    r = ''.join([c for c in datum['review/text'].lower() if not c in punctuation])\n",
    "    ws = r.split()\n",
    "    for w in ws:\n",
    "        if w in words:\n",
    "            feat[wordId[w]] += 1\n",
    "    feat.append(1) # offset\n",
    "    return feat"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Predict whether the ABV is above 6.7 (roughly, above average) from the review text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [],
   "source": [
    "random.shuffle(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = [feature(d) for d in data]\n",
    "y = [d['beer/ABV'] > 6.7 for d in data]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Train on first 9000 reviews, test on last 1000"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [],
   "source": [
    "mod = sklearn.linear_model.LogisticRegression(max_iter=5000)\n",
    "mod.fit(X[:9000],y[:9000])\n",
    "predictions = mod.predict(X[9000:]) # Binary vector of predictions\n",
    "correct = predictions == y[9000:]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Accuracy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.832"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(correct) / len(correct)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To compute precision and recall, we want the output probabilities (or scores) rather than the predicted labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [],
   "source": [
    "probs = mod.predict_proba(X[9000:]) # could also use mod.decision_function"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Build a simple data structure that contains the score, the predicted label, and the actual label (on the test set)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [],
   "source": [
    "probY = list(zip([p[1] for p in probs], [p[1] > 0.5 for p in probs], y[9000:]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For example..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(0.9999998757375738, True, True),\n",
       " (0.012730778147385805, False, False),\n",
       " (0.9970704663915445, True, True),\n",
       " (0.00011250748923901674, False, False),\n",
       " (0.9506151739427136, True, True),\n",
       " (0.8510164232115964, True, False),\n",
       " (0.9507137179403021, True, True),\n",
       " (0.001979864980773123, False, True),\n",
       " (0.9035933864763247, True, True),\n",
       " (0.08471416399359076, False, True)]"
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "probY[:10]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Sort this so that the most confident predictions come first"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [],
   "source": [
    "probY.sort(reverse=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For example..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(1.0, True, True),\n",
       " (0.9999999999999885, True, True),\n",
       " (0.9999999999999716, True, True),\n",
       " (0.9999999999999376, True, True),\n",
       " (0.9999999999998979, True, True),\n",
       " (0.9999999999980089, True, True),\n",
       " (0.9999999999967666, True, True),\n",
       " (0.9999999999824809, True, True),\n",
       " (0.9999999997863847, True, True),\n",
       " (0.9999999997671105, True, True)]"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "probY[:10]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Receiver operator characteristic (ROC) curve"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [],
   "source": [
    "xROC = []\n",
    "yROC = []\n",
    "\n",
    "for thresh in numpy.arange(0,1.01,0.01):\n",
    "    preds = [x[0] > thresh for x in probY]\n",
    "    labs = [x[2] for x in probY]\n",
    "    if len(labs) == 0:\n",
    "        continue\n",
    "    \n",
    "    TP = sum([(a and b) for (a,b) in zip(preds,labs)])\n",
    "    FP = sum([(a and not b) for (a,b) in zip(preds,labs)])\n",
    "    TN = sum([(not a and not b) for (a,b) in zip(preds,labs)])\n",
    "    FN = sum([(not a and b) for (a,b) in zip(preds,labs)])\n",
    "       \n",
    "    TPR = TP / (TP + FN) # True positive rate\n",
    "    FPR = FP / (TN + FP) # False positive rate\n",
    "    xROC.append(FPR)\n",
    "    yROC.append(TPR)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEZCAYAAACNebLAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XmcVOWV//HPAdkacUGiBFwARRFBUDI0IfmZdqMxoyEjMWLUuCVGHc1MxoyaTVCjYzQTjXuixkRRUURBHdOAmpagbAFZNDRbFAGBCCiytQJ9fn/c21DdVHXfbqrq1vJ9v179ou6tp24d7qu7Tp3nufd5zN0RERGpr0XcAYiISG5SghARkaSUIEREJCklCBERSUoJQkREklKCEBGRpJQgREQkKSUIKQhm9r6ZbTWzT83sQzN7zMxK6rUZbGavhW0+NrMJZnZsvTYdzOxuM1setltiZr8xs44NvPcPzWyBmW02sw/M7BkzOy5T/1eRbFGCkELhwL+6+35Af+AE4Ce1T5rZl4GJwAvAF4HuwHzgTTPrFrZpBbwOHAsMCY/1ZWA9MDDZm5rZPcA1wNXAgcDRwHjgX5v6HzCzlk19jUgmme6klkJgZu8Bl7n76+H2r4De7n5WuD0FmOfu19R73SvAP939YjP7HnAL0MPdt0V4z6OAKqDU3WenaPMX4Al3/0O4fRHwPXf/f+F2DUFy+U+gJUES2+Lu/51wjPFApbvfbWZfBO4FTgI2AXe7+73RzpJI06iCkIJjZocCZwBLwu12wGDguSTNnwVODx+fClRESQ4J7VekSg4NqP+tbBjwL0Bv4Gng27VPmNkBwBDgaTMz4CXgbYIq6FTgP8zsdEQyQAlCCsl4M/sU+ABYC4wK93ck+F1fneQ1q4FO4eODUrRJpantU7nN3Te6+2fu/lfAzeyr4XPfAt5y97UE3Vyd3P1Wd9/p7u8DjwAj0hCDyB6UIKSQDAvHDb4G9GL3B//HQA3Bt+76vgisCx+vT9Emlaa2T2Vlve1ngPPCx98BngwfHw50NbMN4c/HBOMsB6chBpE9KEFIITGA8Fv4n4D/Dbe3AtOAc5K85tvAq+HjV4HysEsqiteAQ83sxAbabAESr6bqnKRN/S6np4FvmdnhQCkwLty/AviHu3cMfw509/1rx1lE0k0JQgrV3cDpZtY33L4BuMjMrjazfc3sQDP7JTAIuDls8wTBh/A4MzvGAgeZ2U/MbGj9N3D3pcADBOMDXzOzVmbWxszONbPrwmZzgbPNrF04qH1ZY4G7+1yC6uQRgjGRT8OnZgKbzOw6M2trZi3N7Dgz+1JzTpBIY5QgpFDU+Rbu7usIqogbw+03gXJgOMG4wXtAP+Ar7r4sbPM5cBrBlUmTgY3AdIKxhhlJ39T9P4D7gPsJurKWAt8kGEwGuAvYDqwBHgNGNxR3gqcIBqGf3NXQvQY4k+Ay3veAfwIPA/ulOIbIXsnoZa5m9ijBL/Radz8+RZt7CK442QJcHH57EhGRmGW6gniM4FtbUmZ2BnCku/cEfgA8lOF4REQkoowmCHefSlB2pzIMeDxsOwPY38wOyWRMIiISTdxjEF0JBgVrrQr3iYhIzOJOECIikqP2ifn9VwGHJWwfGu7bg5lp0igRkWZwd2vO67KRICz8SeZF4N+BZ8xsEPBJOKVAUppYMDBq1ChGjRoVdxg5QediN50L2LFjB1u2bOHmm2/miiuuYOvWrWzZsqXOT9R99fdv3bqVli1bUlJSQvv27ff4Sba/KW3btWtHixZ716lTU1PDrFmz2LlzJ6WlpbRs2ZJgCq/myWiCMLOngDLgIDP7ABgJtAbc3X/v7q+Y2dfNbCnBZa6XZDIeEUmf7du38/HHH7NhwwY2bNiw63F1dTXbt29nx44dSX9SPdec11RXV9f5IN+xYwft27enpqaG8ePHR/pwPvjggyN9kJeUlNCqVau4T3tK69atY+bMmQwYMIBDDknPtT4ZTRDu/p0Iba7OZAwi0rDNmzezfv36Oh/yUR5XV1dz4IEH0rFjRzp27MiBBx7IgQceSElJCfvss8+un1atWtV53K5duzrP12+T6idZm9atW9f5QG/Tpg1mVlTVVGLVUF5eTsuW6VtWJO4xCGmGsrKyuEPIGToXuyWei23btrF27VrWrl3LmjVrWLNmza7H9ffV1NTQqVOnOh/0iY+PPPLIpM/tu+++e9V9kUnF8nuRiaohUd4sGGRmni+xSuHbvn07ixYtYsGCBcyfP5933nmHTZs2xRrTjh07+Oijj1izZg3V1dUccsghdO7cede/qR7n8ge9JJdsrCEVM2v2ILUShEgSNTU1bNiwoc437RUrVrBgwQIWLFjAkiVLOOKII+jbty99+/alT58+dOyYctnqrGjRogVf+MIX6Ny5MwcccIA+9AtUU6sGJQiRZti5cyezZ89m8uTJLF26tE4XzEcffUSHDh3qfOPu0qULffr0oW/fvvTu3Zt27aLOCi6y95pSNSRSghCJaOXKlUyePJmJEyfy6quvcsghhzBkyBD69OlTp9vl4IMPpnXr1nGHKwLs3ViDEoRIAnfn448/3lURrFq1ijfffJPXXnuNDRs2cMopp1BeXs6QIUM47LDDGj+gSEyaWzUkUoKQouTurFixgnnz5tX5Wb58Oe3atdtVEXzxi1+ktLSUU045hb59++71zUgi2ZCuK5SUIKSozJo1i7Fjx/L888+zefNm+vfvT79+/Xb926NHD9q2bRt3mCLNko6qIdHeJAjdByF5Y82aNVx99dXMnj2b888/n7Fjx9K/f39drSMFI9P3NTSVEoTkrB07drBy5Uree+893n77bW6//Xa+//3vM3r0aFUIUlAyeTf03lCCkJzg7lRVVTF9+nRmzJjBjBkzWLhwIQcffDDdu3enR48eTJo0if79+8cdqkha5VrVkEhjEBIrd+eVV15h1KhRrF27lq9+9auUlpZSWlpK//79VSlIwUr3WEMqGoOQvOPuVFRUMGrUKLZs2cLIkSMZPny4rjCSopDLVUMiVRCSVe7OxIkTGTVqFJs2bWLkyJF861vfUmKQopCtqiGRKgjJSe7Ou+++y9y5c+vcp9CpUyduvPFGzjnnHCUGKRr5UjUkUgUhGfH666/z85//nFWrVjFo0CD69eu3616FLl266NJUKRpxVA2JVEFI1m3evJnZs2fz/vvv19m/c+dORo8ezYoVK7jppps499xzc+aSPZFsy8eqIZEqCIls7ty5PPDAA8yYMYOlS5fSt29fevbsuUc30UknncR3v/vdnF6eUSST4q4aEmmqDdkr7s7mzZtZu3Yt27ZtS9pm+fLlXHrppfz4xz/m5JNPpl+/fprtVCSJXKsa1MUkTbJkyRL+8Ic/8MYbb+xa/wCgc+fOlJSUJH3NPvvswxNPPEF5eXk2QxXJG7l6N/TeUAVRRF5++WXuvPNOqqqquPDCCznrrLPo2rXrrmUnRaR5cq1qSKQuJmnQRx99xLPPPsstt9zC/fffz1lnnaXuIZE0yKWxhlTUxSR7WLVqFS+88ALPP/88s2fPpry8nAkTJlBaWhp3aCIFIZerhnRRBVFg/v73v3PVVVcxf/58zjzzTM4++2zKy8u1frJImuRD1ZBIFYQAQVfS0KFDueGGG5g0aZK6kUTSrBiqhkSqIApE7ZUTAwcO5Lbbbos7HJGCkm9VQyINUhehuXPnctttt7F161YA1q9fT0lJCZMmTcqrX16RXJfvVYO6mIpIdXU1N998M4888gi/+MUv6N69+67nTjrpJCUHkTQpxPsamkoJIofNmzeP5557btfEduvXr2f8+PF8+ctfZv78+XTu3DnmCEUKU75XDemiLqYctXr1agYMGEC3bt0YOnQoAO3bt+eMM86gd+/eMUcnUpjyeawhFXUxFZAVK1YwbNgw5s6dyznnnMOYMWM0NbZIFqhq2JMSRI555ZVXaNOmDdu2baNNmzZxhyNS8DTWkJoSRA7YvHkzL730Em+88QYTJkygoqJCyUEkC1Q1NEwJImbjxo3jmmuuoX///pSVlTFlyhR69uwZd1giBU1VQzRKEFm2bt06Jk6cCATJYeHChYwdO5avfOUrMUcmUhxUNUSnBJElixYtYty4cYwbN45OnTrRqVMnBgwYwFNPPUXbtm3jDk+k4KlqaLqMX+ZqZkOBu4EWwKPu/qt6zx8G/Ak4IGzzE3f/c5Lj5OVlrtOmTeOPf/wjFRUVfP3rX6dXr15cc801eyzTKSKZU8xVQ85OtWFmLYDFwKnAh8AsYIS7VyW0+R0wx91/Z2bHAq+4e/ckx8rLBHHJJZcAMHz4cM4888yYoxEpLoV4X0NT5fJ9EAOBJe6+HMDMxgDDgKqENjXAfuHjA4BVGY4pq6ZNm8aYMWPo379/3KGIFJVirhrSJdMJoiuwImF7JUHSSHQTMMnMfgiUAKdlOKaMmjJlChUVFUDw7eXDDz+kT58+MUclUjw01pA+uTBIfR7wmLvfZWaDgNHAcckajho1atfjsrIyysrKshFfZIsWLWL48OFcddVVu+5jePjhh9lnn1w4zSKFT1UDVFZWUllZmZZjZXoMYhAwyt2Hhts3AJ44UG1m7wDl7r4q3F4GlLr7unrHytkxiOrqah588EHuvfdebrjhBi6//PK4QxIpKhprSC2XxyBmAUeZ2RHAamAEQcWQaDlBt9KfwkHqNvWTQ6578MEHGTt2LA899BCnn3563OGIFBVVDZmTrctcf8vuy1xvN7ObgFnu/nKYFB4G9iUYsP5vd38tyXFysoJwd/r06cODDz7ISSedFHc4IkVDVUM0OXuZazrlaoJ4/vnnueWWW5gzZ45mXRXJElUN0eVyF1PBcnfuvPNO7rzzTp599lklB5Es0BVK2aUE0Qw1NTVceeWVzJs3j1mzZtGtW7e4QxIpeKoask8JohkmTJjA9OnTmTp1Kh06dIg7HJGCpqohPkoQzbBlyxb69u2r5CCSYaoa4qUE0QwrV66kVatWcYchUrBUNeQGXcXURJ9//jkHHXQQM2fO5Nhjj407HJGCo6ohvXQVU5Z8+umn/PSnP6VLly5KDiJppqoh92hRggg2bNjAyJEj6dGjB5999hnTp0+POySRgrJu3ToqKiro1q0bgwcPVnLIEaogGrBy5UqeeOIJfv3rX3P22Wczffp0jjrqqLjDEikYqhpymxJEEuPGjeOOO+5g6dKlnHXWWUybNo2jjz467rBECorGGnKfBqkTuDu33XYbv//973nooYc47bTTdLWSSJppDqXs0iB1mvz2t7/lueeeY9q0aXTp0iXucEQKjqqG/KIEkWDJkiVcfPHFSg4iaaaxhvykBBFatGgRY8eOZfLkyXGHIlJQVDXkLyUIgqkzhg8fzi9/+Uv69esXdzgiBUFVQ/4r+kFqd+eCCy6gVatWPPbYY5q2WyQNVDXkDg1S74WpU6cyc+ZM5s2bp+QgspdUNRSWok8QmzZtomfPnpSUlMQdikheU9VQeCIlCDNrDRzu7kszHI+I5BlVDYWr0bmYzOxfgQXA5HC7v5m9kOnARCT3aQ6lwhalgrgZKAX+AuDuc82sYCYkWrx4sRb+EWkiVQ3FIUqC2O7un9QbwM2PS58asXbtWm699VZef/31uEMRyRsaaygeURLEQjP7NtDCzLoDPwQKYr7rG2+8kUsuuYS+ffvGHYpIzlPVUHwavQ/CzNoDNwJDwl0TgZvcfVuGY6sfR1rvg9ixYwedO3dmzpw5HH744Wk7rkghUtWQvzJ9H0S5u18PXJ/whmcDzzfnDXPFtGnTOPzww5UcRBqgqqG4RVlR7udJ9v0s3YFk29q1a+nRo0fcYYjkLF2hJCkrCDMrB4YCXc3sNwlP7QfUZDqwTJs6daoShEgSqhqkVkNdTP8E3gGqgXcT9m8CbshkUJk2ZcoUnnzySRYsWBB3KCI5RWMNkijKIHVbd6/OUjwNxZGWQeo5c+YwdOhQnnrqKU477bQ0RCaS/7TKW+Ham0HqKAniSOBWoDfQtna/u2d1keZ0JIhNmzZxzDHHcN9993H22WenKTKR/KaqobBl+iqmPwK/BH4NnAFcQp7eKFdRUcHxxx+v5CCCxhqkcVGuYipx94kA7r7M3X9OkCjyzksvvcSwYcPiDkMkdrpCSaKIUkF8ZmYtgGVmdgWwCsi7yYsmT57Miy++yE9+8pO4QxGJjaoGaYooCeJHQHuCKTZuBfYHLs1kUOn2+OOPc9111/Hiiy9y7LHHxh2OSCw01iBN1awlR82sq7uvykA8Db1nswapV69ezXHHHcdbb71Fr169MhCZSG7TFUrFbW8GqRscgzCzfzGzb5pZp3D7ODN7HJjRhOCGmlmVmS02s+tTtPm2mb1rZgvMbHST/gcN+OSTTxgxYgSXX365koMUJY01yN5IWUGY2f8Aw4F5QHfgZeAq4FfAg+6+tdGDB2MXi4FTgQ+BWcAId69KaHMU8Axwsrt/amad3H1dkmM1qYL48MMPGTp0KCeffDJ33XUXLVpEGY8XKQyqGqRWpi5zHQb0c/dtZtYRWAH0dfd/NOH4A4El7r48DHRMeNyqhDbfB+53908BkiWHpqqqqmLo0KFcccUVXH/99dRby0KkoGmsQdKloQRRXTult7tvMLPFTUwOAF0JEkutlQRJI9HRAGY2laDL66bay2qbw90544wz+MUvfsFll13W3MOI5B1doSTp1lCC6GFmtVN6G9A9YRt3T9fdZvsARwEnAYcDU8ysT21F0VSzZ8+mdevWXHppXl1oJbJXVDVIJjSUIIbX276vGcdfRfChX+vQcF+ilcB0d68B3jezxUBPYHb9g40aNWrX47KyMsrKyvZ4w4qKCs4880x1K0lRUNUg9VVWVlJZWZmWYzXrMtfIBzdrCSwiGKReDcwEznP3hQltysN9F4dXS80G+rv7x/WOFWmQ+rrrrqNTp05cd911afyfiOQeVQ0SRabnYmo2d99pZlcDkwjGFx5194VmdhMwy91fdveJZjbEzN4FdgA/rp8cRGQ3VQ2SLRlNEADuXgEcU2/fyHrb1wLXpuP9du7cqe4lKViqGiSbIicIM2vj7p9lMph0WLZsGYMGDYo7DJG0UtUgcWj07jEzG2hmC4Al4XY/M7s345E107x58+jXr1/cYYikje6GlrhEWTBoOnAuMN7dTwj3vePufbIQX2IcjQ5Sb9y4ka5du7Jx40b9EUne093Qkg6ZHqRu4e7L6/Xr72zOm2XaggULOO644/SHJHlPYw2SC6IkiBVmNhDw8LLVawjmV8o58+fPV/eS5DWNNUguiZIgrgTuIbjhbS3wargv58ybN4/jjz8+7jBEmkVVg+SaKAlih7uPyHgkaTBv3jwuuOCCuMMQaRJVDZKroiSIWWa2iGBK7ufdfVOGY2q2Dz74gO7du8cdhkhkqhoklzWaINz9SDMbDIwAbjKzucAYdx+T8eiaQes+SD5Q1SD5INKnqbu/5e4/BE4EPgWezGhUIgVM9zVIvmi0gjCzfQkW+RkBHAtMAAZnOK4m27hxI1u2bKFDhw5xhyKSlKoGyTdRxiDeAV4C7nD3v2Y4nmZ7+umnGTJkiBKE5CSNNUg+ipIgeoRrNeS0Rx99lFtuuSXuMETqUNUg+SxlgjCz/w1nWR1nZnvMcZHGFeX22vz581mzZg2nn3563KGI7KKqQfJdQxXEM+G/zVlJLqseeeQRLrnkEn07k5ygqkEKRZTJ+q529/sa25dpqSbrq66u5tBDD2XWrFm6B0Jip6pBcs3eTNYX5TLXS5Psu6w5b5YJ48ePp3///koOEquamhpmzJjB4sWLKS8vV3KQgtDQGMS5BJe2djez5xOe6gB8kunAonr00Uf53ve+F3cYUsRUNUihStnFZGbdgSOB/wFuSHhqE/C2u2/PfHh14tmji2n58uUMGDCAlStX0rZt22yGI6L1GiQvZGQ9CHd/D3iPYPbWnFRZWUl5ebmSg2SdqgYpBg11Mb3h7l8zs4+BxK/uBri7d8x4dI2oqqqid+/ecYchRURXKEkxaegy15PDfztlI5DmqKqq4vzzz487DCkSqhqk2DTUxVR79/RhwIfu/rmZfRU4HhhNMGlfrKqqqujVq1fcYUiBU9UgxSrKfRBzgX8hWFGuAngZ6OnuZ2Y+vDpx1Bmk3r59Ox06dOCTTz7RGIRkjKoGyXcZGaROUOPu283sbOBed7/HzN5uzpul0/vvv0+XLl2UHCQjVDWIRFxy1MzOAS4Evhnua5W5kKJZvHgxRx99dNxhSAFS1SASiJIgLgWuIpju+x/h/RFPZzasxi1atIhjjjkm7jCkgKhqEKkrypKj75jZD4GjzKwXsNTdb818aA1bvHgxffv2jTsMKRCqGkT2FGVFuf8HPAGsIrgHorOZXejub2Y6uIYsXryY4cOHxxmCFABVDSKpReliugv4urv/HcDMjiVIGF/KZGCN0RiE7C1VDSINi5IgWtcmBwB3X2hmrTMYU6M2b97Mhg0bOOyww+IMQ/KUqgaRaKIkiDlm9hDBzXEA5wOxXua6ZMkSjjrqKFq0iDJbuchuqhpEoouSIK4AfghcF27/Fbg3YxFFoO4laSpVDSJN12CCMLO+BFN+v+Dud2QnpMbpEldpClUNIs2Tso/GzH4KjCfoUppsZslWlouFKgiJQqu8ieydhjrxzweOd/dzCOZiujI7ITVOCUIas27dOioqKujWrRuDBw9Wl5JIMzSUID5z9y0A7v5RI21TMrOhZlZlZovN7PoG2g03sxozO7Gh47m7upgkJVUNIunT0BhEj4S1qA04MnFtanc/u7GDm1kL4D7gVOBDYJaZTXD3qnrt9iUYCJ/e2DG3bt3K559/TseOsa9XJDlGYw0i6dVQgqh/m/J9zTj+QGCJuy8HMLMxwDCgql67W4Db2X2lVErbtm2jpKSkGaFIodIVSiKZ0dCCQa+l4fhdgRUJ2ysJksYuZnYCcKi7/9nMGk0QW7dupV27dmkITQqBqgaRzIlyH0TGmJkBvwEuStzd0GtUQQioahDJhkwniFUEK9HVOjTcV6sDcBxQGSaLzsAEM/uGu8+pf7BRo0axZs0aNm7cSGVlJWVlZRkMXXKVqgaR1CorK6msrEzLsRpdcnRXQ7M27v5Zkw5u1hJYRDBIvRqYCZzn7gtTtP8L8F/uvsdUHrVLjr711ltce+21TJs2rSmhSAFIrBpKS0tVNYhEsDdLjjZ66aqZDTSzBcCScLufmUWaasPddwJXA5OAd4Ex4WR/N5lZsjWtnQhdTBqDKD66r0Ek+6J0Md0DnElwVzXuPs/MTo76Bu5eARxTb9/IFG1Paex4GqQuLhprEIlPlATRwt2XB0MEu+zMUDyN0iB18dBYg0i8oiSIFWY2EPBwTOEaYHFmw0pNXUyFT1WDSG6IkiCuJOhmOhxYC7xKjPMybd26VRVEAVPVIJI7Gk0Q7v5PYEQWYolEFURhUtUgknsaTRBm9jDB1UV1uPvlGYmoERqkLjyqGkRyU5QuplcTHrcF/o2602dklQapC4eqBpHcFqWL6ZnEbTN7ApiasYgasW3bNs3kWgBUNYjkvuZMtdEdiO0vWoPU+U1Vg0j+iDIG8TG7xyBaABuAGzIZVEM0SJ2/VDWI5JcGE0Q4gV4/dk+wV+NRJ2/KECWI/KOqQSQ/NZgg3N3N7BV375OtgBqjLqb8oqpBJH9FGYOYa2YnJJthNQ6qIPKDqgaR/JcyQZjZPu6+AziBYC3pZcAWgtlW3d1PzFKMdaiCyH2qGkQKQ0MVxEzgROAbWYolElUQuUtVg0hhaShBGIC7L8tSLJEoQeQmVQ0ihaehBPEFM/uvVE+6+28yEE+j1MWUW1Q1iBSuhhJES2BfGlnhLdtUQeQOVQ0ihS3lmtRmNieugehkatekLikp4aOPPqJ9+/Zxh1S0tDa0SP7YmzWpGx2DyCXuTnV1tSqIGKlqECkeDVUQHd19Q5bjScnMfNu2bey///589tlncYdTdFQ1iOSnjFQQuZQcammAOh6qGkSKU3Nmc42NBqizS1coiRS3vEoQqiCyR1WDiORVglAFkXmqGkSklhKE7KKqQUQS5VWCUBdTZqhqEJFk8ipBqIJIP1UNIpJKXiUIVRDpo6pBRBqTVwlCFUR6qGoQkSiUIIqIqgYRaYq8ShDqYmo+VQ0i0lR5lSBUQTSdqgYRaa68SxCqIKJT1SAieyOvEsTWrVv1QReBqgYRSYe8ShDqYmqcqgYRSZe8ShAapE5NVYOIpFuLTL+BmQ01syozW2xm1yd5/kdm9q6ZzTWzyWZ2WKpjqYJIbt26dVRUVNCtWzcGDx6s5CAiaZHRCsLMWgD3AacCHwKzzGyCu1clNJsDDHD3ajO7ArgTGJHseBqkrktVg4hkUqa7mAYCS9x9OYCZjQGGAbsShLu/kdB+OnB+qoNt3bpVFURIYw0ikmmZThBdgRUJ2ysJkkYqlwF/TvWkuphUNYhI9uTMILWZXQAMAL6Wqs2yZct44okneO211ygrK6OsrCxr8eUCVQ0i0pjKykoqKyvTcixz97QcKOnBzQYBo9x9aLh9A+Du/qt67U4Dfguc5O7rUxzLe/Xqxbhx4+jdu3fGYs5FiVVDaWmpqgYRiczMcHdrzmszXUHMAo4ysyOA1QSDz+clNjCzE4CHgPJUyaFWMQ5Sq2oQkbhkNEG4+04zuxqYRHBJ7aPuvtDMbgJmufvLwB1Ae2CsmRmw3N2/mex4xTRIrbEGEYlbRruY0snMfN9992XVqlXst99+cYeTUaoaRCRd9qaLKa8SRIsWLaiurqZVq1Zxh5MRGmsQkXTL5TGItGrRokXBJgdVDSKSa/IqQRTiALXGGkQkV+VVgii0AWpVDSKSy5QgYqCqQUTyQV4liELoYlLVICL5Iq8SRD5XEKoaRCTf5FWCyNcKQlWDiOSjvEoQ+VZBqGoQkXymBJEhqhpEJN/lVYLIhy4mVQ0iUijyKkHkegWhqkFECkleJYhcrSBUNYhIIcqrBJGLFYSqBhEpVEoQzaSqQUQKXV4liFzpYlLVICLFIK8SRNwVhKoGESkmeZUg4qwgVDWISLHJqwQRRwWhqkFEipUSRANUNYhIMcurBJGtLiauHzdkAAAJBElEQVRVDSIieZYgslFBqGoQEQnkVYLIZAWhqkFEpK68ShCZqiBUNYiI7KmoE4SqBhGR1PIqQaSzi0lVg4hIw/IqQaSjglDVICISTV4liL2tIFQ1iIhEZ+4edwyRmJnX1NRgZk1+bWLVUFpaqqpBRIqGmeHuTf/gJM8qiOYkB1UNIiLNk1cVRFNiVdUgIlJEFURUqhpERPZeQVUQqhpEROpSBYGqBhGRdMv7CkJVg4hIantTQbRIdzD1mdlQM6sys8Vmdn2S51ub2RgzW2Jm08zs8KjHXrduHRUVFXTr1o3BgwcrOYiIpFFGE4SZtQDuA8qB44DzzKxXvWaXARvcvSdwN3BHY8etqalhxowZLF68mPLy8qLrUqqsrIw7hJyhc7GbzsVuOhfpkekKYiCwxN2Xu/t2YAwwrF6bYcCfwsfPAac2dEBVDfrlT6RzsZvOxW46F+mR6UHqrsCKhO2VBEkjaRt332lmn5hZR3ffUP9gM2bM0BxKIiJZkotXMaUcTOnWrVvRdSeJiMQlo1cxmdkgYJS7Dw23bwDc3X+V0ObPYZsZZtYSWO3uByc5Vn5cbiUikmNy9T6IWcBRZnYEsBoYAZxXr81LwEXADOAc4PVkB2ruf1BERJonowkiHFO4GphEMCD+qLsvNLObgFnu/jLwKPCEmS0B1hMkERERiVne3CgnIiLZlfEb5ZoqkzfW5ZsI5+JHZvaumc01s8lmdlgccWZDY+ciod1wM6sxsxOzGV82RTkXZvbt8HdjgZmNznaM2RLhb+QwM3vdzOaEfydnxBFnppnZo2a21szmN9DmnvBzc66Z9Y90YHfPmR+ChLUUOAJoBcwFetVrcyXwQPj4XGBM3HHHeC6+BrQNH19RzOcibLcv8AbwFnBi3HHH+HtxFDAb2C/c7hR33DGei98BPwgfHwu8F3fcGToXXwX6A/NTPH8G8H/h41JgepTj5loFkfYb6/JYo+fC3d9w9+pwczrBPSWFKMrvBcAtwO3AZ9kMLsuinIvvA/e7+6cA7r4uyzFmS5RzUQPsFz4+AFiVxfiyxt2nAh830GQY8HjYdgawv5k1es9AriWIZDfW1f/Qq3NjHfCJmXXMTnhZFeVcJLoM+HNGI4pPo+fCzE4ADnX3Qj0HtaL8XhwNHGNmU83sLTMrz1p02RXlXNwEXGhmK4CXgWuyFFuuqX+uVhHhC2Uu3ijXVEV/+auZXQAMIOhyKjoWrEX7G4LLpXftjimcXLAPQTfTScDhwBQz61NbURSZ84DH3P2u8L6s0QTzwkkEuVZBrCL4ha51KHuWhCuBwwDCG+v28yTTchSAKOcCMzsN+AlwVlhmF6LGzkUHgj/6SjN7DxgETCjQgeqofyMvunuNu78PLAZ6Zie8rIpyLi4DngVw9+lAWzPrlJ3wcsoqws/NUNLPk/pyLUHsurHOzFoT3BPxYr02tTfWQQM31hWARs9F2K3yEPANd18fQ4zZ0uC5cPdP3f1gd+/h7t0JxmPOcvc5McWbSVH+RsYDJwOEH4Y9gX9kNcrsiHIulgOnAZjZsUCbAh6TMVJXzi8C34VdM1x84u5rGztgTnUxuW6s2yXiubgDaA+MDbtZlrv7N+OLOjMinos6L6FAu5iinAt3n2hmQ8zsXWAH8GN3b2gAMy9F/L34MfCwmf2IYMD6otRHzF9m9hRQBhxkZh8AI4HWBFMb/d7dXzGzr5vZUmALcEmk44aXPYmIiNSRa11MIiKSI5QgREQkKSUIERFJSglCRESSUoIQEZGklCBERCQpJQjJGWa2M5yW+e3w35RTuYc3Ry1Iw3v+JZwueq6Z/dXMmnzHsZn9IJzuBDO7yMw6Jzz3ezPrleY4Z5jZ8RFe8x9m1nZv31uKlxKE5JIt7n6iu58Q/vtBI+3TdRPPee7en2C2y1839cXu/jt3r11z4WISJkFz98vdvSotUe6O80GixfmfQEma3luKkBKE5JI97n4OK4UpZva38GdQkja9w2/VtYvCHBnuPz9h/4Ph3eYNve8UoPa1p4avm2dmj5hZq3D/7Wb2Tvg+d4T7RprZtWY2HPgSMDp8bdvwm/+JYZVxR0LMF5nZPc2McxrQJeFYD5jZTAsWBxoZ7rsmbPMXM3st3DcknN31b2b2jJkpeUiDlCAkl7RL6GIaF+5bC5zm7l8imFbl3iSvuwK4291PJPiAXhl265wLDA731wDnN/L+3wAWmFkb4DHgHHfvR7AYzZUWTCv/TXfvE36T/2XCa93dxwF/A74TVkDVCc+PA/4tYftcYEwz4xxKMN9SrZ+6+0CgH1AWztx6L8FkbGXufqqZHQT8DDg1PJezgWsbeR8pcjk1F5MUva3hh2Si1sB9FiyRuJPks5JOA35mwZKrz7v7UjM7FTgRmBV+I29LkGySedLMtgHvE6wXcAzwD3dfFj7/J+Aq4H5gm5k9AvwfwfoCyexRAbj7OjNbZmYDCVZBO8bd3zKzf29inG0I5t9KXDJyhJl9n+DvuTPQG3iHupO3DQr3vxm+TyuC8yaSkhKE5LofAWvc/XgLpnffVr+Buz9tZtOBM4H/M7MfEHww/sndfxbhPb7j7m/XboTftpN9yO8MP+BPJZhJ+GqatqLhMwTVQhXwQu3bNTXOsKvqPmC4mXUjqAQGuPunZvYYQZKpz4BJ7t5YdSKyi7qYJJck63vfH1gdPv4u0HKPF5l1d/f3wm6VF4HjgdeAb5nZF8I2BzZwVVT9910EHGFmPcLtC4E3wj77A9y9Aviv8H3q28TuJS7re4Fg6ccRBMtj0sw4bwRKzezo8L02A5ssWELyjIT2nybEMh34SsL4TElzrtiS4qIEIbkk2VVJDwAXm9nbBEtpbknS5tvhwPHbBAsHPe7uC4GfA5PMbB7BlNCdk7x2j/d0988IpkN+LnztToJ1N/YDXg73TSGobur7I/BQ7SB14vHd/RNgIXC4u/8t3NfkOMOxjf8F/tvd5wNzw+OOBqYmvOZhoMLMXgvXQLgEeDp8n7cIutJEUtJ03yIikpQqCBERSUoJQkREklKCEBGRpJQgREQkKSUIERFJSglCRESSUoIQEZGklCBERCSp/w/ZriqVKfCTuwAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7f8f9e5a00f0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.plot(xROC,yROC,color='k')\n",
    "plt.plot([0,1],[0,1], lw=0.5, color='grey')\n",
    "plt.xlabel(\"False Positive Rate\")\n",
    "plt.ylabel(\"True Positive Rate\")\n",
    "plt.title(\"ROC Curve\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Precision recall curve"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [],
   "source": [
    "xPR = []\n",
    "yPR = []\n",
    "\n",
    "for i in range(1,len(probY)+1):\n",
    "    preds = [x[1] for x in probY[:i]]\n",
    "    labs = [x[2] for x in probY[:i]]\n",
    "    prec = sum(labs) / len(labs)\n",
    "    rec = sum(labs) / sum(y[9000:])\n",
    "    xPR.append(rec)\n",
    "    yPR.append(prec)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEZCAYAAACNebLAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xl8VdW5//HPE0hAhiQMYQgJs6C2DlilUG5t1IpYW6lWWmkdUKzUqlz9tb+q3P6YqrVYr3bQWlutl4tXqahV0HurrRCVIlwqgihhUCHMM0kERDA8vz/OTjiEQ3JMcs4+Ofm+X6/9yp6y9rM34Txnrb3X2ubuiIiI1JQRdgAiIpKalCBERCQmJQgREYlJCUJERGJSghARkZiUIEREJCYlCBERiUkJQpoEM3vXzM6pY59CM6swM0tWXIlkZl8xsw1Ry2vN7LwwY5LmRQlCGsTM1pnZ/uCDeYuZPW5mbRr7OO7+eXd/vY59Nrh7tjdS708z6171AV3jPDcn6jxjiPtczGywmb1kZnvMbKeZLTSzMQmMTdKcEoQ0lAMXu3s2cCZwFvDTWDs2wW/2XwP+J5iPPs8zgEHAnWEFVpOZDQVeBeYB/dy9M3AjMKKe5emzQZQgpFEYgLtvIfKB+nkAM5tnZneZ2Xwz2wf0MbNsM3ss+Ba+wcx+Fp04zOz7ZrYi+Kb+rpmdEayvbl4xs7PNbLGZlQe1lvuC9b3M7HDVh1tQA3jBzHaZ2Wozuz7qOJPM7M9mNj041nIzO7PGeX0N+O8Y57kdeJlIoqgqL8vM7jOz0iCm35lZq6jtI83s7SDmNWY2PFg/Jup83zezG+r5b3Av8Li73+fuu4M433b3K4LjXGNmb0T/QnCt+gbzjwcxv2RmHwE/Ds4j+t/mUjNbFsybmd0RxLzDzGaaWW49Y5cUpQQhjcbMCol8qC6JWn0lcD3QHlgPTAc+AfoS+RZ+QbAdMxsFTASuDL6pXwLsinGoXwO/cvccoB/wdNS26CaZPwfH7AaMAn5uZkVR278BPAnkAHOAh6LOpSVwDvBKjPMsAC4C1kStngb0B04LfvYIzgUzGxyc94+CmM8B1gW/tw34WnC+1wIPVCXFeJnZCcBQ4Nk6dq3ZXFVzeTTwM3dvT+Qa7wXOq7H9iWB+PJF/ny8D+cAe4HefJW5pAtxdk6Z6T8BaoALYHcz/FmgVbJsHTI7atwtwoGp7sO4K4NVg/q/ALbUc57xgvhiYBHSqsU8voJLIF59C4BDQJmr7z4E/BfOTgFeitp0M7ItaPg/4W4zzrAAOA38DsqO27wX6RC0PBT4M5n8P/Huc1/MvVdcA+AqwPtY1qPE7+UFMA2op9xrg9RrrDgN9g/nHgf+osf1nwGPBfPvgHAuC5RXAuVH7dgcOAhlh/01qarxJNQhpDCPdvaO793H3W9z9k6htG6LmewGZwBYz221me4h8eOYF2wuBD+I43lhgILDSzBaZ2cUx9ukO7Hb3/VHrSol8s6+yNWp+P9A6qu29ZvMSRM4zm8gH90lAZwAzywPaAG8F57WbSFNbp7rOy8wuMrM3g2awPURqJp1rOfdY9hD5sO/+GX+vpg01lp8ELjWzTOAy4C133xhs6wX8Jep8VxBJyF0bGIOkkJZhByBpobabz9HNGBuI1CA6efC1s4YNRJqMauXuHwDfBTCzbwHPmFnHGrttBjqaWVt33xes6wlsqqv8wNeAS2usq7oH8YaZTQf+PdhnJ5EE8zmP3IepKeZ5mVkW8AyRZrgX3P2wmf2F2q/nMdz9YzN7E/gW8NpxdttHJIlVHbtbrKJqlFtiZqVErsVoIgmjynrgOnd/87PEKk2LahCSNO6+lUib/gNm1j640dnXjvRveJTIzdEzAcysX3Bf4yhm9j0zq/qWXU7kg+1w1ebgWBuBBcA9ZtbKzE4jUvOYUUuIFpTfB8hy91W17Psr4AIzOzVIdn8EfhXUJjCzHlU3ooHHgGvN7NzgnPPNbACQFUw7g+RwETA8xrHi8RNgjJn9qCpZmtnpZvZUsH0Z8DkzOy24eT6J+B6hfRL4VyL3GmZFrX+EyD2dnsGx8szsknrGLilKCUIaqrYPmVjbribyobiCyH2LWURuIuPuzwB3A0+aWQWR9viqmkF0WSOA94J9HgC+E9WsFb3faKAPkdrEs8D/c/d5ccQbq3mp5rfrnURuPE8MVt0BvA8sNLMyIolwQLDvYiI3oH9FJKEVA73cfS+Rm72zgmaaK4AX4ojv2A2Rb/LnAecDH5jZTiLNdy8F29cAU4k8CrsaeOM4RdU0k8hN9Vc9eDoq8Osg1lfMrJxIMh4cZ5nSRFjsmr5I82VmLwG/dfe/hh2LSJhUgxA51rxgEmnWVIMQEZGYElqDsEiP2W1m9k4t+/wm6Fm69LN2EBIRkcRJ9GOujxPpOPWfsTYGT230c/cTzeyLRG6qDTnOvqrqiIjUg7vXaxy0hNYg3H0+kU48xzOSIHm4+yIgx8yO29Em7F6FqTJNmjQp9BhSZdK10LXQtah9aoiwO8r14Ojem5uCddti7fz667WO9px0p59+Ojk5OWGHISKSEGEniM/kqquuqp7PyckhNze8wSPXrl3LTTfdxB133BFaDCIiNRUXF1NcXNwoZYWdIDYRGaemSgG1DIVQWlqa8IDi9W//9m9UVlaGcuyioqJQjpuKdC2O0LU4ojlfi6KioqPOf8qUKfUuKxn9IIzjjy0zm0jPWsxsCFDm7jGbl+SI5vzHX5OuxRG6FkfoWjSOhNYgzOxJoAjoZGbriYz/kgW4u//B3f/bzL5mZu8TGUzs2kTGk8oOHjxIWVkZZWVl7Nmzp3q+5nTVVVfxpS99KexwRaQZSGiCcPfvxrHPzYmMIUyVlZVs27aNDRs2sGHDBrZu3cr27durpx07dlTP7927l9zc3OqpQ4cORy3n5uby1ltvMW/evIQmCHfn4MGDtGrVqu6dRSSthX0PoskrLy9n9erVrFq1itWrV/P+++9XJ4QtW7aQm5tLz549KSgoID8/n7y8PE477TS6dOlSPeXl5dGhQwcyMmpv8du3b1+t26N98skn7Nq1i927d9f6s6q2Ul5eXj0dPnyY1atX079//4ZenmNUVlZSUVFBeXk5FRUVVFRUsHfv3urpo48+Yu/evYwcOZJTTjml0Y8vIvFTgqinrKwspk6dyj333MOAAQMYOHAgAwYMYMSIEfTs2ZPCwkIKCgoa/Zv4rl27WLRoEVu3bj3utH37dg4dOkSnTp3o2LFj9c/o+X79+tGxY8fqmkrVU2HZ2dmcffbZx01Ghw4dYs+ePezatat62r17d3VyqfrwP978gQMHaN++PdnZ2eTk5NC+fXvat29Pu3btqqfFixfj7koQIiFrMmMxmZmnUqz79+9nz5495OfnE/Ve94R6+OGH+fnPf0737t3p1q3bcae8vDzatWtX77hOP/10Tj75ZDIyMqprGlXTvn376NChA506daqeOnbsWJ1ccnJyyMnJqZ6vuS6euCZMmMCcOXMoLCykrKyMIUOGcP/999frXCByf6eqtlJRUUHv3r1DfURaJJnMDK9nT2olCDnGc889x8aNG4+qgVRN2dnZdTaFNdTatWtZsGABubm5lJaWMn36dF588cWYN+/jWVdZWVmdoPbv38+1117LPffcc9Qx3Z2PP/74qOa2yspKhg0bltBzFUk0JQhJW++++y5nn3027dq1q/Um/vGWc3JyaN26dXWt5b777uPhhx+mV69ex9x7yczMrK7t5OTksGTJEjZv3kxeXl4dUYqkLiUIkTht27aNBQsWHJUIqqasrKyj9i0oKKjuDPnBBx/Qpk2bWEWKpDQlCJEE2LJlC59++imf//znWbduHR06dAg7JJHPrCEJQk8xiRxH9+7dAcjIyGDZsmUcOnSInTt3HnXTvmq5rKyMRx99lFNPPTXkqEUajxKESB0GDRrErbfeSufOnY+6Yd+3b18GDx5Mp06dmDp1KuvXr1eCkLSiJiaRRnDxxRdzzjnnMGDAAHbs2MGOHTvIycnh5pvTdqAAaSLUxCQSskGDBvH8889X94w/4YQT+OUvf0nHjh257LLLaN26ddghinxmqkGIJMCBAwe48sormTdvHtOmTePSSy+lU6dOYYclzZCeYhJJUePGjeOFF17g29/+NjfddNMxw6Js2bKFrVu38qMf/YgLLrig+vcqKyvZtWsXW7duZdu2bWzdupWBAwcyePDg4x7rk08+0SCLcgwlCJEU9vjjjzN+/PjjDpEye/ZsSktL6dq1a3VC2LlzJ7m5uXTt2pVu3brx8ccfc/DgQcaMGcO2bduOmrZv3862bdvYt28f8+bNY9iwYWRmZoZ92pIilCBEmrCSkhIWLlxIt27dqhNCXl7eUR/yJSUlTJw4kc6dO9O1a9fqqUuXLtXzV199NS+++CI33ngjN9xwA4WFhRpzSpQgRCQyntTMmTO55ZZbAPj+979/zJhT0vwoQYjIUe69917+8Y9/8PzzzydttGFJTQ1JEMl4J7WIJFm/fv2YPXs2zz//fNihSBOmGoRImrrkkkuYM2cOM2fOJDc3lwsvvDDskCQEamISkWPs2bOHyy+/vHosqauvvpqNGzeye/duZs2aRU5OTtghShIoQYjIce3du5epU6eSl5dHQUEBt912GwsWLKBv375hhyZJoAQhInHr27cvf//738nLy8Pdyc7ODjskSSDdpBaRuGVmZnL66afTqVMnzj33XJ555hnuv/9+1q1bF3ZokmJUgxBpZrZu3UpmZibl5eV873vfo3v37pSUlNC+fXuys7OZMmWK3sWdRtTEJCINsmTJEt5++22eeuopvvvd73LdddeFHZI0EiUIEWkUP/jBD3jkkUdYsGABGzduZO3ataxdu5Z169bh7jz11FOsW7euehoyZAhDhw4NO2yphRKEiDSKTz/9lC984Qu0aNGCPn360Lt3b/r06UNhYSFXXHEFmZmZ1et37drFaaedxkMPPURZWdlRiWPdunV069aNO++8M+xTavZSOkGY2QjgV0RuiD/m7tNqbO8J/AnIA3YBV7r75hjlKEGIhOjgwYNkZmZWD93xyCOP8JOf/ASIjANVlTh69+5N27Ztueeee/jlL3/JqFGj6NWrV5ihN2spmyDMLANYDZwPbAYWA1e4+8qofZ4GZrv7E2ZWBFzn7lfHKEsJQiSF7Nu3j9WrV9O7d29yc3OPGvPJ3ZkwYQKPPfYYO3bsYOXKlQwcOJDKykoqKyvJysoKMfLmJZUTxBBgkrtfFCzfAXh0LcLM3gUudPdNwXK5ux/TxVMJQqTpKSsro6CggM6dO9OyZUvWr1/Pueeey8svvxx2aM1GKveD6AFsiFreGKyLthS4DMDMLgPamVmHBMclIkmQm5tLSUkJDz/8MC+++CIvvfQS7777LmPGjKGoqIji4uKwQ5RatAw7AOD/Ag+a2RjgdWATUBlqRCLSaAoLCyksLASga9eujBs3joKCArZt28aqVasoKioKN0A5rkQniE1Az6jlgmBdNXffAnwLwMzaAt9y94pYhU2ePLl6vqioSH9YIk1Mhw4dmDhxIgCLFi0KOZr0VFxc3Gg1s0Tfg2gBrCJyk3oL8L/AaHcvidqnE7Db3d3M7gI+dffJMcrSPQiRNDJu3DgGDRrEqFGjyM3NpUWLFmGHlJZS9h6Eu1cCNwOvAO8BM929xMymmNnXg92KgFVmthLoAtydyJhEJDW0atWKm2++mc6dO/PNb34TfQFMPeooJyKh2L9/PwcOHGDevHlcfvnlTJ48mUmTJoUdVtpJ2cdcG5MShEj6mjBhAosWLeLVV18NO5S0k7JNTCIi8Rg6dChz585lzZo1HD58OOxwJKAahIiEbvv27RQWFnLw4EE6dOjATTfdRElJCbfddhsAq1at4sMPP2T8+PF06dIl5GibFjUxiUiT99FHH7F8+XIeeOABTjnlFH7zm99QVlbGF7/4RQYMGMCMGTMAKC0tpWfPnnWUJlWUIEQk7VT9f68a42nbtm1069aNcePGceedd7Jy5UoyMjK44IIL2LdvH61ataJly1To+5talCBEpFn405/+xNixY+nRowc5OTmsWLGCwsJCNmzYQO/evXnvvfdo06ZN2GGmFCUIEWkWDh8+zP79+2nXrh3uzvz586uH7Rg6dCgPPfQQmZmZjBkzhszMzLDDTQlKECLS7F1zzTVs3LiRuXPnUlxczFe+8pWwQ0oJShAiIoGBAweye/duduzYEXYoKUH9IEREAjNnzqSgoCDsMNKCEoSIpKU9e/awYMEC9uzZE3YoTZYShIiklVatWrF06VJ69erFsGHD6NixI927d+fpp58OO7QmR/cgRCStuDs7d+6kc+fOlJaWsnr1ambMmMHcuXMZOXIkv/vd78IOMal0k1pEpBbr169n1qxZPPfcc0yfPp3+/fuHHVLSKEGIiNThrbfe4pxzzuHAgQOUl5fTrl27sENKCiUIEZE4uDu5ubmsX7+enJycsMNJCj3mKiISBzOjoqKCKVOmhB1Kk6AEISLNyl133cXvf/978vPzWbFiRdjhpDQ1MYlIs1JRUcHs2bO56qqrAFiwYAFDhw4NOarEUROTiEicsrOzufLKK/n4448ZMGAAH374YdghpSwNni4izVLr1q0566yzOHz4MB988AHLli1jxYoVXHfddeTn54cdXkpQghCRZisrK4trr72W/Px8Tj/9dN59911OPfVURo4cGXZoKUH3IESk2SovL6eyspKOHTsCMHz4cP72t7/x05/+lBtvvDEtahK6ByEiUg85OTnVyQFgxowZXH755cyYMYNly5aFGFlqUIIQEQl07dqVWbNmcfLJJ4cdSkpQghARkZiUIEREaigvL+c73/kOmzdvDjuUUClBiIjUMG3aND766CN69OjB9ddfzxtvvBF2SKFIeIIwsxFmttLMVpvZ7TG2F5rZXDNbYmZLzeyiRMckIlKbL3/5y1RUVDB27FiWLFnCzJkzww4pFAlNEGaWATwIXAh8DhhtZifV2O2nwJ/d/UxgNNC83uYhIimpffv2PProo4wdOzbsUEKT6BrEYGCNu5e6+yFgJlCzB8phIDuYzwU2JTgmEZHP5J133uHRRx8NO4ykS3SC6AFsiFreGKyLNgW4ysw2AC8CtyQ4JhGRuJ1xxhl07NiR8ePHc8kll9CjRw/uvffesMNKilQYamM08Li7P2BmQ4AniDRHHWPy5MnV80VFRRQVFSUjPhFpxoYNG8azzz7LfffdR//+/TnttNO4/fbbmT59OpMnT2bUqFFhh3iU4uJiiouLG6WshA61EXzgT3b3EcHyHYC7+7Sofd4FLnT3TcHyB8AX3X1njbI01IaIhM7dmTNnDtOnT+eUU07hZz/7Wdgh1SqVh9pYDPQ3s15mlgVcAcyusU8p8FUAMzsZaFUzOYiIpAoz45JLLmHQoEHcddddbN26NeyQEiahCcLdK4GbgVeA94CZ7l5iZlPM7OvBbj8Gvm9mS4H/Aq5JZEwiIo3hhz/8IQDdu3dn9OjRlJeXhxxR49NoriIi9VRaWsof/vAH/vjHPzJ//nx69OhB27Ztww7rKA1pYlKCEBFpoJNOOoldu3axc+dO3nzzTYYMGRJ2SNWUIEREQrR8+XLatGnDeeedR2VlJRs3bgw7pGqpfJNaRCTtnXrqqfTr14+nn36aHj1qdvVqupQgREQkJiUIERGJSQlCRERiUoIQEZGYlCBERCQmJQgRkUa0ceNG9u/fTzo8lq8EISLSSDp27MjmzZtp27YtGRkZLFiwIOyQGiTujnJm1gPoRdQQ4e7+eoLiinV8dZQTkZS3c+dONmzYwMUXX8yWLVv46KOPaNeuXWjxJLwntZlNA74DrAAqg9Xu7pfU56D1oQQhIk1JeXk5ubm5bN++nby8vNDiaEiCiPeFQd8EBrr7J/U5iIhIc5OTk0Pnzp3DDqNB4r0H8SGQmchAREQktcRbg9gPLDWzV4HqWoS7j09IVCIiErp4E8Rsjn0TnIiIpLG4mpjcfTrwFPBWMD0ZrBMRkVo8+OCDFBUV8eqrr3L48OGww/lM4qpBmFkRMB1YBxhQaGbXJPMxVxGRpuYb3/gGu3fv5rXXXuO1114D4NChQ7RsGW/jTbjifcz1LeC77r4qWB4APOXuX0hwfNEx6DFXEWmy1q1bR58+fdi/fz8nnHBC0o6bjBcGZVYlBwB3X42eahIRiVvv3r0BKCgoYMCAAbz+euo3wMSbIP5pZo+aWVEw/RH4ZyIDExFJN3PmzOGGG25gzZo1nHfeeZxyyilhh1SreJuYWgE3Af8SrHoD+F0yO86piUlE0kVFRQXz58/n4osvZtGiRQwePDhhx0r4UBupQAlCRNKJu9OtWze2b9/OO++8w6mnnpqQ4yTsHoSZPR38XG5m79Sc6nNAERGJfHCvWrWKli1b8uSTT4YdTky11iDMrLu7bzGzXrG2u3tpwiI7NhbVIEQk7UyYMIF77rknYe+PSMZorm2Bj939cPCI60nA/7j7ofoctD6UIEQkHR04cIATTjiBBQsWMHTo0EYvPxmPub4OtA7eCfEKcBXwH/U5oIiIHJGVlUW3bt340pe+hJnRv39/li1bFnZYQPwJwtx9P3AZkaeXRgGfS1xYIiLNQ0ZGBqtXr2bhwoU8++yz5OTkMGHCBJ599tmwQ4u7ielt4IfAA8BYd3/PzJa7e5233c1sBPArIsnoMXefVmP7/cC5gANtgTx37xijHDUxiUjamz59OjNnzmTTpk3885//JCsrq0HlJeMexFeAHwH/cPdpZtYXuLWu4b7NLANYDZwPbAYWA1e4+8rj7H8zcIa7Xx9jmxKEiDQLL7/8MiNGjOCcc86pHsOpvlK2H4SZDQEmuftFwfIdRF5VOu04+/8DmOjur8bYpgQhIs3Gk08+yfe+970GP92UyH4Qvwp+zjGz2TWnOMrvAWyIWt4YrIt1rJ5Ab2BuXJGLiKSx4cOHAzB16tTQYqhrzNkZwc/7Eh0IcAXwTG3VhMmTJ1fPFxUVUVRUlPioRERC0LlzZ+644w527979mX6vuLiY4uLiRonhM/eDCJZbAK2CJ5tq+70hwGR3HxEsH7eJycyWAD9094XHKUtNTCLSrPz2t79l/PjxXHrppTz33HP1KiMZ/SBeBdpELZ8A/D2O31sM9DezXmaWRaSWcEzTlJmdBOQeLzmIiDRHY8eO5e677+Yvf/kLS5cuTfob6eJNEK3dfW/VQjDfppb9q/arBG4m0rnuPWCmu5eY2RQz+3rUrt8BZsYftohI+mvTpg1jx46lZcuWnHXWWSxfvjypx4+3iekfwC3uviRY/gLwoLs3fr/w48egJiYRaZYqKyvJz89n+PDhzJgxo+5fiJKMJqZbgVlm9oaZzQf+TKRmICIiCdaiRQvGjx/PE088wdq1a5N23Lj7QZhZJjAwWFyVzIH6guOrBiEizVZZWRkdOnRgwoQJ3H333XH/XsJrEGbWBrgd+Fd3fxfoXeMegoiIJFBubi5Tp04lIyPehp+Gi/dIjwMHgap7DpuAuxISkYiIxOTu3HXXXWzfvj0px4s3QfRz93uBQwBB/4d6VVlERKR+rr8+MkxdqiWIg2Z2ApERVzGzfsAnCYtKRESOkZ+fz+c+l7w3LcSbICYBfwUKzey/iHSc+0nCohIRkZjKyspYvnw5lZWVCT9WnU8xmZkBBcB+YAiRpqWF7r4z4dEdHYeeYhKRZu/LX/4y8+fPB2Dr1q107dq11v2T8T6IuF4OlEhKECIiEeXl5fTo0YMlS5YwYMCAWvdNRke5JWZ2dn0OICIijSsnJ4f8/PyEH6eu4b6rfBG40szWAfuINDO5u5+WqMBERCRc8SaICxMahYiIpJxaE4SZtQZ+APQHlgOPufunyQhMRETCVdc9iOnAWUSSw0XAvyc8IhERSQl1NTGdUvX0kpk9Bvxv4kMSEZFUUFcNonrEVjUtiYg0L3XVIE43s4pg3oATguWqp5iyExqdiIiEptYE4e4tkhWIiIikluQNLC4iIk2KEoSIiMSkBCEiIjEpQYiISExKECIiEpMShIiIxKQEISLSBK1Zs4aJEyeSyPfkKEGIiDRBv/71r/nzn/9MRkYGu3btSsgx4nqjXCrQG+VERI62fft2evXqRUlJCb179465TzLeKFdvZjbCzFaa2Wozu/04+3zbzN4zs+Vm9kSiYxIRSQddunSp853UDRHvC4PqxcwygAeB84HNwGIze8HdV0bt0x+4HRjq7hVm1jmRMYmISHwSXYMYDKxx91J3PwTMBEbW2Of7wEPuXgHg7jsTHJOIiMQh0QmiB7AhanljsC7aAGCgmc03swVmptebioikgIQ2McWpJZFXmp4D9AReN7PPV9UoREQkHIlOEJuIfOhXKQjWRdsILHT3w8A6M1sNnAi8VbOwyZMnV88XFRVRVFTUyOGKiDQ90U94FhcXU1xc3CjlJvQxVzNrAawicpN6C5FXlo5295KofS4M1o0JblC/BZzh7ntqlKXHXEVEajCLPMFaUlLCSSedFHN7Sj7m6u6VwM3AK8B7wEx3LzGzKWb29WCfl4FdZvYe8Crw45rJQUREYlu5MvJQ6Mknn9zoHebUUU5EpIk7fPgw3bt35+233yY/P/+obSlbgxARkcTLyMigRYvGf0O0EoSIiMSkBCEiIjEpQYiIpIEtW7YwZcqURi1TCUJEJA1MnDiRN954o1HLVIIQEUkDX/3qV+nUqVOjlqkEISIiMSlBiIikifnz57NpU83RjOpPCUJEJA306tULgBNPPJGDBw82SplKECIiaaBnz54sWrSIjz/+mFGjRjVKmUoQIiJpYvDgwTz88MNs3769UcpTghARSSMnnngibdq0aZSylCBERCQmJQgREYlJCUJERGJSghARSSMZGRnMnTuXv/71rw0vqxHiERGRFDFs2DDOOOMMSktLG1yWEoSISBrJysri7LPPrn5XdUMoQYiISExKECIiaebgwYOMGzeOTz75pEHlmLs3UkiJZWbeVGIVEQnTunXr6NOnD2VlZeTm5uLu9WpvUg1CRCTN9O7dG4Bf/OIXDSpHCUJEJA3deuutFBcXN6gMJQg1DnlXAAAH/ElEQVQRkTQ0fPhwcnNzG1SGEoSISBrKzMxscGc5JQgRkTR07rnncv755zeoDCUIEZE01KJFCwoKChpUhhKEiIjElPAEYWYjzGylma02s9tjbL/GzLab2ZJgui7RMYmISN1aJrJwM8sAHgTOBzYDi83sBXdfWWPXme4+vq7yGvrIlohIc9KnT58G/X5Ce1Kb2RBgkrtfFCzfAbi7T4va5xrgLHe/pY6y1JNaROQzMrOU7UndA9gQtbwxWFfTZWa21MyeNrOG3VUREZFGkdAmpjjNBp5090NmdgMwnUiT1DEmT55cPV9UVERRUVEy4hMRaTKKi4sbrTk+GU1Mk919RLB8TBNTjf0zgN3ufkz3PzUxiYh8dqncxLQY6G9mvcwsC7iCSI2hmpl1i1ocCaxIcEwiIhKHhDYxuXulmd0MvEIkGT3m7iVmNgVY7O4vAuPN7BLgELAbGJPImEREJD56H4SISBpL5SYmERFpopQgREQkJiUIERGJSQlCRERiUoIQEZGYlCBERCQmJQgREYlJCUJERGJSghARkZiUIEREJCYlCBERiUkJQkREYlKCEBGRmJQgREQkJiUIERGJSQlCRERiUoIQEZGYlCBERCQmJQgREYlJCUJERGJSghARkZiUIEREJCYlCBERiUkJQkREYlKCEBGRmJQgREQkJiUIERGJKeEJwsxGmNlKM1ttZrfXst+3zOywmZ2Z6JhERKRuCU0QZpYBPAhcCHwOGG1mJ8XYrx0wHliYyHjSRXFxcdghpAxdiyN0LY7QtWgcia5BDAbWuHupux8CZgIjY+z3M+AXwCcJjict6I//CF2LI3QtjtC1aByJThA9gA1RyxuDddXMbBBQ4O7/k+BYRETkM2gZ5sHNzID7gWuiV4cUjoiIRDF3T1zhZkOAye4+Ili+A3B3nxYsZwPvA3uJJIZuwC7gEndfUqOsxAUqIpLG3L1eX7wTnSBaAKuA84EtwP8Co9295Dj7zwP+j7u/nbCgREQkLgm9B+HulcDNwCvAe8BMdy8xsylm9vVYv4KamEREUkJCaxAiItJ0pVxP6ro61plZlpnNNLM1ZvammfUMI85kiONa3GZm75nZUjP7m5kVhhFnMqjD5RHxXAsz+3bwt7HczJ5IdozJEsf/kUIzm2tmS4L/JxeFEWeimdljZrbNzN6pZZ/fBJ+bS83sjLgKdveUmYgkrPeBXkAmsBQ4qcY+NwK/C+a/Q6TZKvTYQ7oWXwFaB/M/aM7XItivHfAasAA4M+y4Q/y76A+8BWQHy53DjjvEa/EIMC6YPxlYG3bcCboW/wKcAbxznO0XAS8F818EFsZTbqrVIOLpWDcSmB7MP0PkBng6qvNauPtr7n4gWFxIjT4maUQdLo+I51p8H3jI3SsA3H1nkmNMlniuxWEgO5jPBTYlMb6kcff5wJ5adhkJ/Gew7yIgx8y61lVuqiWIOjvWRe/jkZvgZWbWMTnhJVU81yLaWCBdOxuqw+UR8fxdDAAGmtl8M1tgZhcmLbrkiudaTAGuMrMNwIvALUmKLdXUvFabiOMLZagd5RpJs3/qycyuBL5ApMmp2VGHy2O0JNLMdA7QE3jdzD5fVaNoZkYDj7v7A0G/rCeIjAsncUi1GsQmIn/QVQo4tkq4ESiE6n4W2e6+OznhJVU81wIz+ypwJ/CNoJqdjuq6Fu2J/KcvNrO1wBDghTS9UR3v/5HZ7n7Y3dcBq4ETkxNeUsVzLcYCTwO4+0KgtZl1Tk54KWUTwedmIObnSU2pliAWA/3NrJeZZQFXALNr7DOHI98URwFzkxhfMtV5LYJmld8T6Xm+K4QYk6XWa+HuFe7exd37unsfIvdjvuE1euOniXj+jzwPnAsQfBieCHyY1CiTI55rUQp8FcDMTgZapfE9GeP4NefZwNVQPcJFmbtvq6vAlGpicvdKM6vqWJcBPOZBxzpgsbu/CDwGzDCzNUSG5bgivIgTJ85rcS/QFpgVNLOUuvs3w4s6MeK8Fkf9CmnaxBTPtXD3l81suJm9B3wK/Njda7uB2STF+XfxY+CPZnYbkRvW1xy/xKbLzJ4EioBOZrYemARkERna6A/u/t9m9jUzex/YB1wbV7nBY08iIiJHSbUmJhERSRFKECIiEpMShIiIxKQEISIiMSlBiIhITEoQIiISkxKESMDMKoNhoZeb2QvBK3Ebs/xrzOw3wfwkM/s/jVm+SGNTghA5Yp+7n+nupxIZGfOmsAMSCZMShEhsbxI12qWZ/djM/jd42cqkqPVXm9kyM3vbzKYH675uZgvN7C0ze8XM8kKIX6TBUmqoDZGQGVQPAnk+8GiwfAFworsPDoY0mW1m/wLsBiYAQ919j5nlBuW84e5Dgt8dC9xOZMgHkSZFCULkiBPMbAmRkS5XAH8L1g8HLgi2GZHxr04Mfs6qGufI3cuC/QvN7GmgO5E3na1N3imINB41MYkcsd/dzyQyhLRx5B6EAfcE9ycGufsAd3+8lnJ+C/zG3U8j8irY1gmNWiRBlCBEjjCA4DWu/wr82MwygJeB68ysLYCZ5Qf3FeYCo6reaGhmHYJysoHNwXxajh4qzYOamESOqB7a2N2XmtkyYLS7/1fwLoE3I7cg+Ai40t1XmNndwGtm9inwNnAdkddcPmNmu4kkkd5JPg+RRqHhvkVEJCY1MYmISExKECIiEpMShIiIxKQEISIiMSlBiIhITEoQIiISkxKEiIjEpAQhIiIx/X/0ORqhyE1CpAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7f8f9e5377b8>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.plot(xPR,yPR,color='k')\n",
    "plt.xlabel(\"Recall\")\n",
    "plt.ylabel(\"Precision\")\n",
    "plt.ylim(0.4,1.01)\n",
    "plt.plot([0,1],[sum(y[9000:]) / 1000, sum(y[9000:]) / 1000], lw=0.5, color = 'grey')\n",
    "plt.title(\"Precision/Recall Curve\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Exercises"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [],
   "source": [
    "path = dataDir + \"beer_50000.json\"\n",
    "f = open(path)\n",
    "data = []\n",
    "for l in f:\n",
    "    data.append(eval(l))\n",
    "f.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Count occurrences of each style"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [],
   "source": [
    "categoryCounts = defaultdict(int)\n",
    "for d in data:\n",
    "    categoryCounts[d['beer/style']] += 1\n",
    "\n",
    "categories = [c for c in categoryCounts if categoryCounts[c] > 1000]\n",
    "\n",
    "catID = dict(zip(list(categories),range(len(categories))))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Build one-hot encoding using common styles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [],
   "source": [
    "def feat(d):\n",
    "    feat = [0] * len(catID)\n",
    "    if d['beer/style'] in catID:\n",
    "        feat[catID[d['beer/style']]] = 1\n",
    "    return feat + [1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = [feat(d) for d in data]\n",
    "y = [d['beer/ABV'] > 5 for d in data]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
       "                   intercept_scaling=1, l1_ratio=None, max_iter=100,\n",
       "                   multi_class='auto', n_jobs=None, penalty='l2',\n",
       "                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,\n",
       "                   warm_start=False)"
      ]
     },
     "execution_count": 87,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mod = sklearn.linear_model.LogisticRegression()\n",
    "mod.fit(X,y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Compute and report metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [],
   "source": [
    "def metrics(y, ypred):\n",
    "    TP = sum([(a and b) for (a,b) in zip(y, ypred)])\n",
    "    TN = sum([(not a and not b) for (a,b) in zip(y, ypred)])\n",
    "    FP = sum([(not a and b) for (a,b) in zip(y, ypred)])\n",
    "    FN = sum([(a and not b) for (a,b) in zip(y, ypred)])\n",
    "\n",
    "    TPR = TP / (TP + FN)\n",
    "    TNR = TN / (TN + FP)\n",
    "\n",
    "    BER = 1 - 0.5*(TPR + TNR)\n",
    "    \n",
    "    print(\"TPR = \" + str(TPR))\n",
    "    print(\"TNR = \" + str(TNR))\n",
    "    print(\"BER = \" + str(BER))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "TPR = 0.9943454210202828\n",
      "TNR = 0.27828418230563\n",
      "BER = 0.3636851983370436\n"
     ]
    }
   ],
   "source": [
    "ypred = mod.predict(X)\n",
    "metrics(y, ypred)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3.2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Balance the classifier using the 'balanced' option"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LogisticRegression(C=1.0, class_weight='balanced', dual=False,\n",
       "                   fit_intercept=True, intercept_scaling=1, l1_ratio=None,\n",
       "                   max_iter=100, multi_class='auto', n_jobs=None, penalty='l2',\n",
       "                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,\n",
       "                   warm_start=False)"
      ]
     },
     "execution_count": 90,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mod = sklearn.linear_model.LogisticRegression(class_weight='balanced')\n",
    "mod.fit(X,y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "TPR = 0.6779348494161033\n",
      "TNR = 0.9751206434316354\n",
      "BER = 0.1734722535761306\n"
     ]
    }
   ],
   "source": [
    "ypred = mod.predict(X)\n",
    "metrics(y, ypred)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3.3"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Precision/recall curves"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [],
   "source": [
    "probs = mod.predict_proba(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [],
   "source": [
    "probY = list(zip([p[1] for p in probs], [p[1] > 0.5 for p in probs], y))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [],
   "source": [
    "probY.sort(reverse=True) # Sort data by confidence"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [],
   "source": [
    "xPR = []\n",
    "yPR = []\n",
    "\n",
    "for i in range(1,len(probY)+1,100):\n",
    "    preds = [x[1] for x in probY[:i]]\n",
    "    labs = [x[2] for x in probY[:i]]\n",
    "    prec = sum(labs) / len(labs)\n",
    "    rec = sum(labs) / sum(y)\n",
    "    xPR.append(rec)\n",
    "    yPR.append(prec)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEZCAYAAACNebLAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAHB5JREFUeJzt3X2UHXWd5/H3pzvPCYHhOYQQeUbBiDgTwlkHmwclIJIVDpow8ijOzqyIjMMuuOyazqgzgwcHRtGjgywGkYmAR0HxIQ7kGhiIZAgghoQECUknBNgQlEkgIaS/+0dVd25uft19+6Huvd35vM6pk3r43arvrXTfT1f9quoqIjAzM6vUVO8CzMysMTkgzMwsyQFhZmZJDggzM0tyQJiZWZIDwszMkhwQZmaW5ICwQUHS7ySd3EObSZJel6Ra1VUkSR+Q1FY2vUrSqfWsyXYvDgjrF0kvSHoj/2BeL+k2SWMGejsRcVxELOyhTVtEjI8BuvtT0oSOD+iK9/liUe8zoer3ImmqpPslvSZpg6RFki4psDYb4hwQ1l8BfDgixgMnAH8K/O9Uw0H4l/1ZwM/z8fL3eTzwXuDz9SqskqSTgAeABcDhEbEv8NfA9D6uz58N5oCwASGAiFhP9oF6HICkBZK+JOlhSZuBQyWNl3Rr/ld4m6QvlgeHpE9Jeib/S/13ko7P53eeXpH0Z5IWS/pjftRyQz5/sqT2jg+3/AjgXkmvSloh6fKy7cyW9ANJc/NtPS3phIr3dRbws8T7fAX4JVlQdKxvhKQbJK3Oa/qmpJFly2dIeiKveaWkD+XzLyl7v89J+ss+/h98BbgtIm6IiI15nU9ExMx8OxdLeqj8Bfm+Oiwfvy2v+X5J/wlcnb+P8v+bj0p6Kh+XpGvzmv+fpHmS9upj7dagHBA2YCRNIvtQXVI2+xPA5cAewBpgLrAVOIzsr/AP5suRdD7wBeAT+V/q5wCvJjb1z8BNEbEncDhwV9my8lMyP8i3eSBwPvD3klrKln8EuBPYE/gJ8I2y9zIMOBmYn3ifBwNnAivLZl8PHAFMyf+dmL8XJE3N3/ff5jWfDLyQv+5l4Kz8/V4K3NgRitWSNBo4CfhhD00rT1dVTs8CvhgRe5Dt403AqRXL78jHryT7//lz4CDgNeCbvanbBoGI8OChzwOwCngd2JiPfx0YmS9bALSWtd0f2NKxPJ83E3ggH/8F8JlutnNqPl4CZgP7VLSZDGwn+8NnErANGFO2/O+B/5uPzwbmly17J7C5bPpU4FeJ9/k60A78ChhftnwTcGjZ9EnA8/n4t4CvVrk/f9SxD4APAGtS+6DiNQflNR3VzXovBhZWzGsHDsvHbwO+W7H8i8Ct+fge+Xs8OJ9+BjilrO0E4C2gqd4/kx4GbvARhA2EGRGxd0QcGhGfiYitZcvaysYnA8OB9ZI2SnqN7MNzv3z5JOD3VWzvk8DRwHJJv5H04USbCcDGiHijbN5qsr/sO7xUNv4GMKrs3Hvl6SXI3ud4sg/uY4B9ASTtB4wBHs/f10ayU2379PS+JJ0p6dH8NNhrZEcm+3bz3lNeI/uwn9DL11Vqq5i+E/iopOHAucDjEbE2XzYZ+FHZ+32GLJAP6GcN1kCG1bsAGxK663wuP43RRnYEsU/kf3ZWaCM7ZdStiPg9cAGApPOAeyTtXdHsRWBvSWMjYnM+7xBgXU/rz50FfLRiXkcfxEOS5gJfzdtsIAuYYyPrh6mUfF+SRgD3kJ2Guzci2iX9iO735y4i4k1JjwLnAb/uotlmshDr2PaBqVVVrHeZpNVk+2IWWWB0WANcFhGP9qZWG1x8BGE1ExEvkZ3Tv1HSHnlH52HacX/Dd8g6R08AkHR43q+xE0l/Ianjr+w/kn2wtXcszre1FngE+AdJIyVNITvy+F43JSpf/6HAiIh4tpu2NwEflPTuPOxuAW7KjyaQNLGjIxq4FbhU0in5ez5I0lHAiHzYkIfDmcCHEtuqxv8ELpH0tx1hKek9kv41X/4UcKykKXnn+Wyqu4T2TuCzZH0Nd5fN/zZZn84h+bb2k3ROH2u3BuWAsP7q7kMmtewisg/FZ8j6Le4m60QmIu4BvgzcKel1svPxHUcG5euaDizN29wIfLzstFZ5u1nAoWRHEz8E/k9ELKii3tTppcq/rjeQdTx/IZ91LfAcsEjSH8iC8Ki87WKyDuibyAKtBEyOiE1knb1356dpZgL3VlHfrguyv+RPBU4Dfi9pA9npu/vz5SuBvyO7FHYF8FAXq6o0j6xT/YHIr47K/XNe63xJfyQL46lVrtMGCaWP9M12X5LuB74eEb+ody1m9eQjCLNdLcgHs92ajyDMzCyp0CMIZXfMvizpt920+Vp+Z+mTvb1ByMzMilP0Za63kd04dXtqYX7VxuERcaSkE8k61aZ10daHOmZmfRARfXoOWqFHEBHxMNlNPF2ZQR4eEfEbYE9JXd5oU++7ChtlmD17dt1raJTB+8L7wvui+6E/6n2j3ER2vntzXT7v5VTjhQu7fdpzYVSnh5B2td01a9bw8MMP13y7RevLdtva2njkkUf6tc1x48Yxbty45Pa7+gXr7hevt68ZM2YMBx10UBXVmtVWvQOiVy688MLO8T333JO99ir+4ZH9TeAitrtmzRpWrFhR8+0Wqa/bbWtrY9myZX3ebnt7O5s2bWLTpk1dtukquHo7v6tlr7zyCrfffjunn346EUF7e/su/6bmRQQTJkxg+PDhPbxL252USiVKpdKArKvwq5gkTQZ+EhFTEsu+BSyIiB/k08uBD0TELkcQkqJeH16NplQq0dLSUu8yGsJQ2BcLFy7koosuYsOGDUiiqampqn8BNm3axBlnnMHw4cNZv349++23X2eglA/lQVM5SOLyyy/nggsuqPOeGDhD4edioEgi+tgHUYuAeAdZQLw7sews4NMR8WFJ08ge4dxlJ7UDwmxna9euZcGCBZ3B0TFUTnc1SGLz5s1cdtllvOtd72L79u07De3t7TtNjxo1ijlz5rD//vsTEbu0f/vtt5PTU6ZM4bjjjqv37totNWxASLoTaCF7quXLZM9/GQFERPxL3uZmskcnbAYujYglXazLAWFWkOeff541a9bQ3NzcOTQ1Ne003dzczKJFi5g7dy5btmxB0k7Lhg0btkv7YcOGIYkFCxawatUqxo4dW++3uttp2IAYSA4Is8FrwoQJPP744+6Mr4P+BIQftWFmhWtqaqK9vb3nhtZQHBBmVrimpqa6XSFnfTeoLnM1s8HJRxA92759O2+99RZbt27lrbfe2mm8t/9u27aNSy+9lIkTJ/a84W44IMyscJIaMiAigq1bt/Lmm2+yZcuWARn6+qHe3t7OyJEjGTFiRL//feihhxg3bhxXXXVVv/aPA8LMClftKab29na2bNnCG2+80e3w5ptvdrusNx/mI0eOZNSoUbsMo0ePTs6vHPbee+/O8ZEjR3YOvf1gb25uHrCnGFx33XVs3ry554Y9cECYWeGampq48soraW5u7vYDf8uWLYwaNYoxY8Z0DqNHj95pOjXsv//+nW07hmo+3EeMGEFTk7tiu+KAMLPC3XLLLbz00ks9fuiPGjXKH9gNxAFhZoU75ZRT6l2C9YGj2szMkhwQZmaW5IAwM7MkB4SZmSU5IMzMLMkBYWZmSQ4IMzNLckCYmVmSA8LMzJIcEGZmluSAMDOzJAeEmZklOSDMzCzJAWFmZkkOCDMzS3JAmJlZkgPCzMySHBBmZpbkgDAzs6TCA0LSdEnLJa2QdE1i+SGS/k3SU5IelHRQ0TWZmVnPCg0ISU3AzcAZwLHALEnHVDS7AfhuRLwH+DvgH4usyczMqlP0EcRUYGVErI6IbcA8YEZFm3cBCwAiopRYbmZmdVB0QEwE2sqm1+bzyj0JnAsg6VxgnKQ/KbguMzPrQSN0Uv8PoEXS48CfA+uA7fUtyczMhhW8/nXAIWXTB+fzOkXEeuA8AEljgfMi4vXUylpbWzvHW1paaGlpGdhqzcwGuVKpxMKFCxk+fDjbtm3r17oUEQNUVmLlUjPwLHAasB54DJgVEcvK2uwDbIyIkPQl4O2IaE2sK4qs1cxsqLjuuusYM2YM1113HZKICPVlPYWeYoqI7cAVwHxgKTAvIpZJmiPp7LxZC/CspOXA/sCXi6zJzMyqU/QpJiLiF8DRFfNml43/EPhh0XWYmVnvNEIntZmZNSAHhJmZJTkgzMwsyQFhZmZJDggzM0tyQJiZWZIDwszMkhwQZmaW5IAwM7MkB4SZmSU5IMzMhqC2tjZeeeWVfq3DAWFmNsScfPLJ/PjHP+aAAw7o13oKfdz3QPLjvs3MeufAAw/k5ZdfbszHfZuZWf1IfcqFTg4IMzNLckCYmQ1RPoIwM7MkB4SZmRXCAWFmNkSNHTu2X693QJiZDVEnnnhiv17vgDAzG6Kam5v79XoHhJmZJTkgzMwsyQFhZmZJDggzM0tyQJiZWZIDwszMkgbV474XLFhQ7zLMzAaNVatWcdlll/X5cd+FB4Sk6cBNZEcrt0bE9RXLJwFzgb3yNp+PiJ8n1uPvgzAz6yVJjRkQkpqAFcBpwIvAYmBmRCwva/NtYElEfFvSO4GfRcShiXU5IMzMeqk/AVF0H8RUYGVErI6IbcA8YEZFm3ZgfD6+F7Cu4JrMzKwKwwpe/0SgrWx6LVlolJsDzJd0JTAGOL3gmszMrApFB0Q1ZgG3RcSNkqYBdwDHphq2trZ2jre0tNDS0lKL+szMBo1SqUSpVBqQdRXdBzENaI2I6fn0tUCUd1RL+h1wRkSsy6d/D5wYERsq1uU+CDOzXmrkPojFwBGSJksaAcwE7qtos5r8tFLeST2yMhzMzKz2Cg2IiNgOXAHMB5YC8yJimaQ5ks7Om10NfErSk8D3gYuLrMnMzKozqG6UGyy1mpk1ikY+xWRmZoOUA8LMzJIcEGZmluSAMDOzJAeEmZklOSDMzCzJAWFmZkkOCDMzS3JAmJlZkgPCzMySqn7ct6SJwOTy10TEwiKKMjOz+qsqICRdD3wceAbYns8OwAFhZjZEVfWwPknPAlMiYmvxJXVZgx/WZ2bWS7V4WN/zwPC+bMDMzAanavsg3gCelPQA0HkUERFXFlKVmZnVXbUBcR+7fhOcmZkNYVV/YVD+laFH5ZPPRsS2wqpKb999EGZmvdSfPohqr2JqAeYCLwACJkm62Je5mpkNXdVexfQ4cEFEPJtPHwX8a0S8r+D6ymvwEYSZWS/V4iqm4R3hABARK/BVTWZmQ1q1ndT/Iek7wB359F8A/1FMSWZm1giqPcU0Evg08P581kPAN2t545xPMZmZ9V5/TjFVfRVTvTkgzMx6r7CrmCTdFREfk/Q02bOXdhIRU/qyUTMza3zdHkFImhAR6yVNTi2PiNWFVbZrLT6CMDPrpcKuYoqI9fnoBqAtD4SRwHuAF/uyQTMzGxyqvcx1ITAq/06I+cCFwHeLKsrMzOqv2oBQRLwBnEt29dL5wLHFlWVmZvVWdUBIOons/of783nNVb5wuqTlklZIuiax/J8kPSFpiaRnJW2ssiYzMytQtTfKXQV8HvhRRCyVdBiwoKcXSWoCbgZOI+uzWCzp3ohY3tEmIj5X1v4K4Phe1G9mZgUp9D4ISdOA2RFxZj59LRARcX0X7f8d+EJEPJBY5quYzMx6qcj7IG6KiKsk/YT0fRDn9LD+iUBb2fRaYGoX2zoEeAfwYA/rNDOzGujpFNP38n9vKLoQYCZwT3eHCa2trZ3jLS0ttLS0FF+VmdkgUiqVKJVKA7Kuap/FNBZ4MyLa8+lmYGR+ZVN3r5sGtEbE9Hy6y1NMkpYA/z0iFnWxLp9iMjPrpVo87vsBYEzZ9Gjg36p43WLgCEmT82+km0niq0slHQPs1VU4mJlZ7VUbEKMiYlPHRD4+ppv2He22A1eQ3Vy3FJgXEcskzZF0dlnTjwPzqi/bzMyKVu0ppn8HPhMRS/Lp9wE3R8RJBddXXoNPMZmZ9VLh30lNdh/E3ZJeJPtO6gPJ/uo3M7Mhqur7ICQNB47OJ5+NiG2FVZXevo8gzMx6qfBOakljgGuAz0bE74B3VPQhmJnZEFNtJ/VtwFtAR5/DOuBLhVRkZmYNodqAODwivgJsA8jvf+jTIYuZmQ0O1QbEW5JGkz9uQ9LhwNbCqjIzs7qr9iqm2cAvgEmSvg/8F+CSoooyM7P66/EqJkkCDgbeAKaRnVpaFBEbii9vpzp8FZOZWS/15yqmam+Uezoi3t2XDQwUB4SZWe/V4llMSyT9WV82YGZmg1O1RxDLgSOBF4DNZKeZIiKmFFrdzjX4CMLMrJdq8aiNM/qycjMzG7x6+ka5UcBfAUcATwO3RsTbtSjMzMzqq6c+iLnAn5KFw5nAVwuvyMzMGkK3fRDlVy9JGgY8FhEn1Kq4ilrcB2Fm1ktFXsXU+cRWn1oyM9u99HQEsZ3sqiXIrlwaTXbDXMdVTOMLr3BHLT6CMDPrpcKuYoqI5r6VZGZmg121N8qZmdluxgFhZmZJDggzM0tyQJiZWZIDwszMkhwQZmaW5IAwM7MkB4SZmSU5IMzMLKnwgJA0XdJySSskXdNFm49JWirpaUl3FF2TmZn1rKpvlOvzyqUmYAVwGvAisBiYGRHLy9ocAfwAOCUiXpe0b0RsSKzLz2IyM+ulWnwndV9NBVZGxOqI2AbMA2ZUtPkU8I2IeB0gFQ5mZlZ7RQfERKCtbHptPq/cUcDRkh6W9Igkf72pmVkDqPY7qYs0jOwrTU8GDgEWSjqu44jCzMzqo+iAWEf2od/h4HxeubXAoohoB16QtAI4Eni8cmWtra2d4y0tLbS0tAxwuWZmg1upVKJUKg3IuorupG4GniXrpF4PPAbMiohlZW3OyOddImlfsmA4PiJeq1iXO6nNzHqpYTupI2I7cAUwH1gKzIuIZZLmSDo7b/NL4FVJS4EHgKsrw8HMzGqv0COIgeQjCDOz3mvYIwgzMxu8HBBmZpbkgDAzsyQHhJmZJTkgzMwsyQFhZmZJDggzM0tyQJiZWZIDwszMkhwQZmaW5IAwM7MkB4SZmSU5IMzMLMkBYWZmSQ4IMzNLckCYmVmSA8LMzJIcEGZmluSAMDOzJAeEmZklOSDMzCzJAWFmZkkOCDMzS3JAmJlZkgPCzMySHBBmZpbkgDAzsyQHhJmZJRUeEJKmS1ouaYWkaxLLL5b0iqQl+XBZ0TWZmVnPhhW5cklNwM3AacCLwGJJ90bE8oqm8yLiyiJrMTOz3in6CGIqsDIiVkfENmAeMCPRTgXXYWZmvVR0QEwE2sqm1+bzKp0r6UlJd0k6uOCazMysCoWeYqrSfcCdEbFN0l8Cc8lOSe2itbW1c7ylpYWWlpZa1GdmNmiUSiVKpdKArEsRMSArSq5cmga0RsT0fPpaICLi+i7aNwEbI2KvxLIoslYzs6FIEhHRp9P4RZ9iWgwcIWmypBHATLIjhk6SDiybnAE8U3BNZmZWhUJPMUXEdklXAPPJwujWiFgmaQ6wOCJ+Clwp6RxgG7ARuKTImszMrDqFnmIaSD7FZGbWe418isnMzAYpB4SZmSU5IMzMLMkBYWZmSQ4IMzNLckCYmVmSA8LMzJIcEGZmluSAMDOzJAeEmZklOSDMzCzJAWFmZkkOCDMzS3JAmJlZkgPCzMySHBBmZpbkgDAzsyQHhJmZJTkgzMwsyQFhZmZJDggzM0tyQJiZWZIDwszMkhwQZmaW5IAwM7MkB4SZmSU5IMzMLKnwgJA0XdJySSskXdNNu/MktUs6oeiazMysZ4UGhKQm4GbgDOBYYJakYxLtxgFXAouKrGeoKJVK9S6hYXhf7OB9sYP3xcAo+ghiKrAyIlZHxDZgHjAj0e6LwD8CWwuuZ0jwD/8O3hc7eF/s4H0xMIoOiIlAW9n02nxeJ0nvBQ6OiJ8XXIuZmfXCsHpuXJKAfwIuLp9dp3LMzKyMIqK4lUvTgNaImJ5PXwtERFyfT48HngM2kQXDgcCrwDkRsaRiXcUVamY2hEVEn/7wLjogmoFngdOA9cBjwKyIWNZF+wXA5yLiicKKMjOzqhTaBxER24ErgPnAUmBeRCyTNEfS2amX4FNMZmYNodAjCDMzG7wa7k7qnm6skzRC0jxJKyU9KumQetRZC1Xsi7+RtFTSk5J+JWlSPeqsBd9wuUM1+0LSx/Kfjacl3VHrGmulit+RSZIelLQk/z05sx51Fk3SrZJelvTbbtp8Lf/cfFLS8VWtOCIaZiALrOeAycBw4EngmIo2fw18Mx//ONlpq7rXXqd98QFgVD7+V7vzvsjbjQN+DTwCnFDvuuv4c3EE8DgwPp/et95113FffBv4b/n4O4FV9a67oH3xfuB44LddLD8TuD8fPxFYVM16G+0Iopob62YAc/Pxe8g6wIeiHvdFRPw6Irbkk4uouMdkCPENlztUsy8+BXwjIl4HiIgNNa6xVqrZF+3A+Hx8L2BdDeurmYh4GHitmyYzgNvztr8B9pR0QE/rbbSA6PHGuvI2kXWC/0HS3rUpr6aq2RflPgkM1ZsNfcPlDtX8XBwFHC3pYUmPSDqjZtXVVjX7Yg5woaQ24KfAZ2pUW6Op3FfrqOIPyrreKDdAdvurniR9Angf2Smn3Y5vuNzFMLLTTCcDhwALJR3XcUSxm5kF3BYRN+b3Zd1B9lw4q0KjHUGsI/uB7nAwux4SrgUmQed9FuMjYmNtyqupavYFkk4HPg98JD/MHop62hd7kP3SlyStAqYB9w7Rjupqf0fui4j2iHgBWAEcWZvyaqqaffFJ4C6AiFgEjJK0b23KayjryD83c8nPk0qNFhCLgSMkTZY0ApgJ3FfR5ifs+EvxfODBGtZXSz3ui/y0yrfI7jx/tQ411kq3+yIiXo+I/SPisIg4lKw/5iNRcTf+EFHN78iPgVMA8g/DI4Hna1plbVSzL1YDpwNIeicwcgj3yYiuj5zvAy6Czidc/CEiXu5phQ11iikitkvquLGuCbg18hvrgMUR8VPgVuB7klaSPZZjZv0qLk6V++IrwFjg7vw0y+qI+K/1q7oYVe6LnV7CED3FVM2+iIhfSvqQpKXA28DVEdFdB+agVOXPxdXALZL+hqzD+uKu1zh4SboTaAH2kbQGmA2MIHu00b9ExM8knSXpOWAzcGlV680vezIzM9tJo51iMjOzBuGAMDOzJAeEmZklOSDMzCzJAWFmZkkOCDMzS3JAmOUkbc8fC/20pHvzr8QdyPVfLOlr+fhsSZ8byPWbDTQHhNkOmyPihIh4N9mTMT9d74LM6skBYZb2KGVPu5R0taTH8i9bmV02/yJJT0l6QtLcfN7ZkhZJelzSfEn71aF+s35rqEdtmNWZoPMhkKcB38mnPwgcGRFT80ea3Cfp/cBG4H8BJ0XEa5L2ytfzUERMy1/7SeAaskc+mA0qDgizHUZLWkL2pMtngF/l8z8EfDBfJrLnXx2Z/3t3x3OOIuIPeftJku4CJpB909mq2r0Fs4HjU0xmO7wRESeQPUJa7OiDEPAPef/EeyPiqIi4rZv1fB34WkRMIfsq2FGFVm1WEAeE2Q4CyL/G9bPA1ZKagF8Cl0kaCyDpoLxf4UHg/I5vNJT0J/l6xgMv5uND8umhtnvwKSazHTofbRwRT0p6CpgVEd/Pv0vg0awLgv8EPhERz0j6MvBrSW8DTwCXkX3N5T2SNpKFyDtq/D7MBoQf921mZkk+xWRmZkkOCDMzS3JAmJlZkgPCzMySHBBmZpbkgDAzsyQHhJmZJTkgzMws6f8DC7oFqZuio7sAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7f8f97cad978>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.plot(xPR,yPR,color='k')\n",
    "plt.xlabel(\"Recall\")\n",
    "plt.ylabel(\"Precision\")\n",
    "plt.ylim(0.4,1.01)\n",
    "plt.plot([0,1],[sum(y) / len(y), sum(y) / len(y)], lw=0.5, color = 'grey')\n",
    "plt.title(\"Precision/Recall Curve\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3.4"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Model pipeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataTrain = data[:25000]\n",
    "dataValid = data[25000:37500]\n",
    "dataTest = data[37500:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {},
   "outputs": [],
   "source": [
    "def pipeline(reg):\n",
    "    mod = linear_model.LogisticRegression(C=reg, class_weight='balanced')\n",
    "    \n",
    "    X = [feat(d) for d in dataTrain]\n",
    "    y = [d['beer/ABV'] > 5 for d in dataTrain]\n",
    "\n",
    "    Xvalid = [feat(d) for d in dataValid]\n",
    "    yvalid = [d['beer/ABV'] > 5 for d in dataValid]\n",
    "    Xtest = [feat(d) for d in dataTest]\n",
    "    ytest = [d['beer/ABV'] > 5 for d in dataTest]\n",
    "    \n",
    "    mod.fit(X,y)\n",
    "    ypredValid = mod.predict(Xvalid)\n",
    "    ypredTest = mod.predict(Xtest)\n",
    "    \n",
    "    # validation\n",
    "    \n",
    "    TP = sum([(a and b) for (a,b) in zip(yvalid, ypredValid)])\n",
    "    TN = sum([(not a and not b) for (a,b) in zip(yvalid, ypredValid)])\n",
    "    FP = sum([(not a and b) for (a,b) in zip(yvalid, ypredValid)])\n",
    "    FN = sum([(a and not b) for (a,b) in zip(yvalid, ypredValid)])\n",
    "    \n",
    "    TPR = TP / (TP + FN)\n",
    "    TNR = TN / (TN + FP)\n",
    "    \n",
    "    BER = 1 - 0.5*(TPR + TNR)\n",
    "    \n",
    "    print(\"C = \" + str(reg) + \"; validation BER = \" + str(BER))\n",
    "    \n",
    "    # test\n",
    "\n",
    "    TP = sum([(a and b) for (a,b) in zip(ytest, ypredTest)])\n",
    "    TN = sum([(not a and not b) for (a,b) in zip(ytest, ypredTest)])\n",
    "    FP = sum([(not a and b) for (a,b) in zip(ytest, ypredTest)])\n",
    "    FN = sum([(a and not b) for (a,b) in zip(ytest, ypredTest)])\n",
    "    \n",
    "    TPR = TP / (TP + FN)\n",
    "    TNR = TN / (TN + FP)\n",
    "    \n",
    "    BER = 1 - 0.5*(TPR + TNR)\n",
    "    \n",
    "    print(\"C = \" + str(reg) + \"; test BER = \" + str(BER))\n",
    "\n",
    "    return mod"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "C = 1e-06; validation BER = 0.16646546520651895\n",
      "C = 1e-06; test BER = 0.2640150292967679\n",
      "C = 1e-05; validation BER = 0.28744502888678103\n",
      "C = 1e-05; test BER = 0.39631747406856366\n",
      "C = 0.0001; validation BER = 0.28744502888678103\n",
      "C = 0.0001; test BER = 0.39631747406856366\n",
      "C = 0.001; validation BER = 0.28744502888678103\n",
      "C = 0.001; test BER = 0.39631747406856366\n"
     ]
    }
   ],
   "source": [
    "for c in [0.000001, 0.00001, 0.0001, 0.001]:\n",
    "    pipeline(c)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3.5"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Fit the classification problem using a regular linear regressor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_reg = [2.0*a - 1 for a in y] # Map data to {-1.0,1.0}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[-1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, -1.0, -1.0, -1.0]"
      ]
     },
     "execution_count": 101,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_reg[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LinearRegression(copy_X=True, fit_intercept=False, n_jobs=None, normalize=False)"
      ]
     },
     "execution_count": 102,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = sklearn.linear_model.LinearRegression(fit_intercept=False)\n",
    "model.fit(X, y_reg)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {},
   "outputs": [],
   "source": [
    "yreg_pred = model.predict(X)\n",
    "yreg_pred = [a > 0 for a in yreg_pred] # Map the outputs back to binary predictions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "TPR = 0.9943454210202828\n",
      "TNR = 0.27828418230563\n",
      "BER = 0.3636851983370436\n"
     ]
    }
   ],
   "source": [
    "metrics(y, yreg_pred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
