Delete Multi-Layered_Perceptron.ipynb

dd8fb74f · ashepley · 7cf661eb · 7cf661eb
Commit dd8fb74f authored 4 years ago by ashepley
--- a/topic_39/topic_41/Multi-Layered_Perceptron.ipynb
+++ b/topic_39/topic_41/Multi-Layered_Perceptron.ipynb
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Classification MLP"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#import libraries\n",
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "from sklearn.model_selection import *\n",
-    "from sklearn.linear_model import *\n",
-    "from sklearn import metrics\n",
-    "from sklearn.neural_network import MLPClassifier\n",
-    "\n",
-    "def check_NaN(dataframe):\n",
-    "    print(\"Total NaN:\", dataframe.isnull().values.sum())\n",
-    "    print(\"NaN by column:\\n\",dataframe.isnull().sum())\n",
-    "    return\n",
-    "\n",
-    "def one_hot_encode(dataframe, col_name):\n",
-    "    dataframe = pd.get_dummies(dataframe, columns=[col_name], prefix = [col_name])\n",
-    "    return dataframe"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Using a Multi-Layered Perceptron (MLP) to Classify Mushrooms as Edible or Poisonous\n",
-    "In this Notebook, we'll be using the mushroom classification dataset, which you can find here https://www.kaggle.com/uciml/mushroom-classification to train an MLP to determine whether a mushroom is edible (e) or poisonous (p), based its physical characteristics."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#load the dataset\n",
-    "data = pd.read_csv(\"./datasets/mushrooms.csv\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>class</th>\n",
-       "      <th>cap-shape</th>\n",
-       "      <th>cap-surface</th>\n",
-       "      <th>cap-color</th>\n",
-       "      <th>bruises</th>\n",
-       "      <th>odor</th>\n",
-       "      <th>gill-attachment</th>\n",
-       "      <th>gill-spacing</th>\n",
-       "      <th>gill-size</th>\n",
-       "      <th>gill-color</th>\n",
-       "      <th>...</th>\n",
-       "      <th>stalk-surface-below-ring</th>\n",
-       "      <th>stalk-color-above-ring</th>\n",
-       "      <th>stalk-color-below-ring</th>\n",
-       "      <th>veil-type</th>\n",
-       "      <th>veil-color</th>\n",
-       "      <th>ring-number</th>\n",
-       "      <th>ring-type</th>\n",
-       "      <th>spore-print-color</th>\n",
-       "      <th>population</th>\n",
-       "      <th>habitat</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>p</td>\n",
-       "      <td>x</td>\n",
-       "      <td>s</td>\n",
-       "      <td>n</td>\n",
-       "      <td>t</td>\n",
-       "      <td>p</td>\n",
-       "      <td>f</td>\n",
-       "      <td>c</td>\n",
-       "      <td>n</td>\n",
-       "      <td>k</td>\n",
-       "      <td>...</td>\n",
-       "      <td>s</td>\n",
-       "      <td>w</td>\n",
-       "      <td>w</td>\n",
-       "      <td>p</td>\n",
-       "      <td>w</td>\n",
-       "      <td>o</td>\n",
-       "      <td>p</td>\n",
-       "      <td>k</td>\n",
-       "      <td>s</td>\n",
-       "      <td>u</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>e</td>\n",
-       "      <td>x</td>\n",
-       "      <td>s</td>\n",
-       "      <td>y</td>\n",
-       "      <td>t</td>\n",
-       "      <td>a</td>\n",
-       "      <td>f</td>\n",
-       "      <td>c</td>\n",
-       "      <td>b</td>\n",
-       "      <td>k</td>\n",
-       "      <td>...</td>\n",
-       "      <td>s</td>\n",
-       "      <td>w</td>\n",
-       "      <td>w</td>\n",
-       "      <td>p</td>\n",
-       "      <td>w</td>\n",
-       "      <td>o</td>\n",
-       "      <td>p</td>\n",
-       "      <td>n</td>\n",
-       "      <td>n</td>\n",
-       "      <td>g</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>e</td>\n",
-       "      <td>b</td>\n",
-       "      <td>s</td>\n",
-       "      <td>w</td>\n",
-       "      <td>t</td>\n",
-       "      <td>l</td>\n",
-       "      <td>f</td>\n",
-       "      <td>c</td>\n",
-       "      <td>b</td>\n",
-       "      <td>n</td>\n",
-       "      <td>...</td>\n",
-       "      <td>s</td>\n",
-       "      <td>w</td>\n",
-       "      <td>w</td>\n",
-       "      <td>p</td>\n",
-       "      <td>w</td>\n",
-       "      <td>o</td>\n",
-       "      <td>p</td>\n",
-       "      <td>n</td>\n",
-       "      <td>n</td>\n",
-       "      <td>m</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>p</td>\n",
-       "      <td>x</td>\n",
-       "      <td>y</td>\n",
-       "      <td>w</td>\n",
-       "      <td>t</td>\n",
-       "      <td>p</td>\n",
-       "      <td>f</td>\n",
-       "      <td>c</td>\n",
-       "      <td>n</td>\n",
-       "      <td>n</td>\n",
-       "      <td>...</td>\n",
-       "      <td>s</td>\n",
-       "      <td>w</td>\n",
-       "      <td>w</td>\n",
-       "      <td>p</td>\n",
-       "      <td>w</td>\n",
-       "      <td>o</td>\n",
-       "      <td>p</td>\n",
-       "      <td>k</td>\n",
-       "      <td>s</td>\n",
-       "      <td>u</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>e</td>\n",
-       "      <td>x</td>\n",
-       "      <td>s</td>\n",
-       "      <td>g</td>\n",
-       "      <td>f</td>\n",
-       "      <td>n</td>\n",
-       "      <td>f</td>\n",
-       "      <td>w</td>\n",
-       "      <td>b</td>\n",
-       "      <td>k</td>\n",
-       "      <td>...</td>\n",
-       "      <td>s</td>\n",
-       "      <td>w</td>\n",
-       "      <td>w</td>\n",
-       "      <td>p</td>\n",
-       "      <td>w</td>\n",
-       "      <td>o</td>\n",
-       "      <td>e</td>\n",
-       "      <td>n</td>\n",
-       "      <td>a</td>\n",
-       "      <td>g</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 23 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "  class cap-shape cap-surface cap-color bruises odor gill-attachment  \\\n",
-       "0     p         x           s         n       t    p               f   \n",
-       "1     e         x           s         y       t    a               f   \n",
-       "2     e         b           s         w       t    l               f   \n",
-       "3     p         x           y         w       t    p               f   \n",
-       "4     e         x           s         g       f    n               f   \n",
-       "\n",
-       "  gill-spacing gill-size gill-color   ...   stalk-surface-below-ring  \\\n",
-       "0            c         n          k   ...                          s   \n",
-       "1            c         b          k   ...                          s   \n",
-       "2            c         b          n   ...                          s   \n",
-       "3            c         n          n   ...                          s   \n",
-       "4            w         b          k   ...                          s   \n",
-       "\n",
-       "  stalk-color-above-ring stalk-color-below-ring veil-type veil-color  \\\n",
-       "0                      w                      w         p          w   \n",
-       "1                      w                      w         p          w   \n",
-       "2                      w                      w         p          w   \n",
-       "3                      w                      w         p          w   \n",
-       "4                      w                      w         p          w   \n",
-       "\n",
-       "  ring-number ring-type spore-print-color population habitat  \n",
-       "0           o         p                 k          s       u  \n",
-       "1           o         p                 n          n       g  \n",
-       "2           o         p                 n          n       m  \n",
-       "3           o         p                 k          s       u  \n",
-       "4           o         e                 n          a       g  \n",
-       "\n",
-       "[5 rows x 23 columns]"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "#check out its features\n",
-    "data.head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Let's choose gill-size (narrow or broad) and spore print color as our features. Note spore-print-color: black=k, brown=n, buff=b, chocolate=h, green=r, orange=o, purple=u, white=w, yellow=y"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>class</th>\n",
-       "      <th>gill-size</th>\n",
-       "      <th>spore-print-color</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>p</td>\n",
-       "      <td>n</td>\n",
-       "      <td>k</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>e</td>\n",
-       "      <td>b</td>\n",
-       "      <td>n</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>e</td>\n",
-       "      <td>b</td>\n",
-       "      <td>n</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>p</td>\n",
-       "      <td>n</td>\n",
-       "      <td>k</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>e</td>\n",
-       "      <td>b</td>\n",
-       "      <td>n</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "  class gill-size spore-print-color\n",
-       "0     p         n                 k\n",
-       "1     e         b                 n\n",
-       "2     e         b                 n\n",
-       "3     p         n                 k\n",
-       "4     e         b                 n"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "chosen_features = data.filter(['class','gill-size','spore-print-color'])\n",
-    "chosen_features.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Total NaN: 0\n",
-      "NaN by column:\n",
-      " class                0\n",
-      "gill-size            0\n",
-      "spore-print-color    0\n",
-      "dtype: int64\n"
-     ]
-    }
-   ],
-   "source": [
-    "#always remember to check for NaN values\n",
-    "check_NaN(chosen_features)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "One hot encode the chosen features"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>class_e</th>\n",
-       "      <th>class_p</th>\n",
-       "      <th>gill-size_b</th>\n",
-       "      <th>gill-size_n</th>\n",
-       "      <th>spore-print-color_b</th>\n",
-       "      <th>spore-print-color_h</th>\n",
-       "      <th>spore-print-color_k</th>\n",
-       "      <th>spore-print-color_n</th>\n",
-       "      <th>spore-print-color_o</th>\n",
-       "      <th>spore-print-color_r</th>\n",
-       "      <th>spore-print-color_u</th>\n",
-       "      <th>spore-print-color_w</th>\n",
-       "      <th>spore-print-color_y</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   class_e  class_p  gill-size_b  gill-size_n  spore-print-color_b  \\\n",
-       "0        0        1            0            1                    0   \n",
-       "1        1        0            1            0                    0   \n",
-       "2        1        0            1            0                    0   \n",
-       "3        0        1            0            1                    0   \n",
-       "4        1        0            1            0                    0   \n",
-       "\n",
-       "   spore-print-color_h  spore-print-color_k  spore-print-color_n  \\\n",
-       "0                    0                    1                    0   \n",
-       "1                    0                    0                    1   \n",
-       "2                    0                    0                    1   \n",
-       "3                    0                    1                    0   \n",
-       "4                    0                    0                    1   \n",
-       "\n",
-       "   spore-print-color_o  spore-print-color_r  spore-print-color_u  \\\n",
-       "0                    0                    0                    0   \n",
-       "1                    0                    0                    0   \n",
-       "2                    0                    0                    0   \n",
-       "3                    0                    0                    0   \n",
-       "4                    0                    0                    0   \n",
-       "\n",
-       "   spore-print-color_w  spore-print-color_y  \n",
-       "0                    0                    0  \n",
-       "1                    0                    0  \n",
-       "2                    0                    0  \n",
-       "3                    0                    0  \n",
-       "4                    0                    0  "
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "subset = one_hot_encode(chosen_features, 'class')\n",
-    "subset = one_hot_encode(subset, 'gill-size')\n",
-    "subset = one_hot_encode(subset, 'spore-print-color')\n",
-    "subset.head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Now, let's just pick the 'class_e' feature. This means if the Perceptron returns a value of 1, then the mushroom is edible. If it returns 0, then the mushroom is poisonous. Let's also pick 'gill_size_b', because the only other value it can be is 'gill_size_n', which means the gill size will be broad when it = 1, and narrow when it = 0. We'll pick all the colours to train on. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>class_e</th>\n",
-       "      <th>gill-size_b</th>\n",
-       "      <th>spore-print-color_h</th>\n",
-       "      <th>spore-print-color_h</th>\n",
-       "      <th>spore-print-color_k</th>\n",
-       "      <th>spore-print-color_n</th>\n",
-       "      <th>spore-print-color_o</th>\n",
-       "      <th>spore-print-color_r</th>\n",
-       "      <th>spore-print-color_u</th>\n",
-       "      <th>spore-print-color_w</th>\n",
-       "      <th>spore-print-color_y</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   class_e  gill-size_b  spore-print-color_h  spore-print-color_h  \\\n",
-       "0        0            0                    0                    0   \n",
-       "1        1            1                    0                    0   \n",
-       "2        1            1                    0                    0   \n",
-       "3        0            0                    0                    0   \n",
-       "4        1            1                    0                    0   \n",
-       "\n",
-       "   spore-print-color_k  spore-print-color_n  spore-print-color_o  \\\n",
-       "0                    1                    0                    0   \n",
-       "1                    0                    1                    0   \n",
-       "2                    0                    1                    0   \n",
-       "3                    1                    0                    0   \n",
-       "4                    0                    1                    0   \n",
-       "\n",
-       "   spore-print-color_r  spore-print-color_u  spore-print-color_w  \\\n",
-       "0                    0                    0                    0   \n",
-       "1                    0                    0                    0   \n",
-       "2                    0                    0                    0   \n",
-       "3                    0                    0                    0   \n",
-       "4                    0                    0                    0   \n",
-       "\n",
-       "   spore-print-color_y  \n",
-       "0                    0  \n",
-       "1                    0  \n",
-       "2                    0  \n",
-       "3                    0  \n",
-       "4                    0  "
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "final = subset.filter(['class_e','gill-size_b','spore-print-color_h','spore-print-color_h','spore-print-color_k','spore-print-color_n','spore-print-color_o','spore-print-color_r','spore-print-color_u','spore-print-color_w','spore-print-color_y'])\n",
-    "final.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "x train/test  (6499, 10) (1625, 10)\n",
-      "y train/test  (6499,) (1625,)\n"
-     ]
-    }
-   ],
-   "source": [
-    "#Create the train/test splits as we did before\n",
-    "x_train, x_test, y_train, y_test = train_test_split(final.drop(['class_e'], axis=1),final['class_e'],test_size=0.2,random_state=1)                                                                       \n",
-    "print(\"x train/test \",x_train.shape, x_test.shape)\n",
-    "print(\"y train/test \",y_train.shape, y_test.shape)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#Convert them from pandas to numpy arrays\n",
-    "x = x_train.values\n",
-    "y = y_train.values\n",
-    "x_t = x_test.values\n",
-    "y_t = y_test.values"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### MLP Training and Evaluation\n",
-    "Let's create an MLP. Currently the only loss function it supports is the Cross-Entropy loss function, which is used by default. By default, it uses the ReLU activation function. It also has a default of 1 hidden layer, containing 100 neurons. \n",
-    "\n",
-    "Here are some parameter options you can explore:\n",
-    "* activation{‘identity’, ‘logistic’, ‘tanh’, ‘relu’}, default=’relu’\n",
-    "* hidden_layer_sizestuple, length = n_layers - 2, default=(100,) where the ith element represents the number of neurons in the ith hidden layer.\n",
-    "* Find out more here: https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "MLP = MLPClassifier() #activation='logistic', hidden_layer_sizes=(1,),hidden_layer_sizes=(1,)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,\n",
-       "       beta_2=0.999, early_stopping=False, epsilon=1e-08,\n",
-       "       hidden_layer_sizes=(100,), learning_rate='constant',\n",
-       "       learning_rate_init=0.001, max_iter=200, momentum=0.9,\n",
-       "       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,\n",
-       "       random_state=None, shuffle=True, solver='adam', tol=0.0001,\n",
-       "       validation_fraction=0.1, verbose=False, warm_start=False)"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "#train the mlp\n",
-    "MLP.fit(x, y)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Mean squared error:  0.023384615384615386\n",
-      "Accuracy: 98.0%\n"
-     ]
-    }
-   ],
-   "source": [
-    "predictions = MLP.predict(x_t)\n",
-    "#Calculate the mean squared error and accuracy\n",
-    "print(\"Mean squared error: \",np.mean((predictions - y_t) ** 2))\n",
-    "print(\"Accuracy:\",str(round(metrics.accuracy_score(y_t, predictions)*100))+\"%\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Test a mushroom with a broad gill-size and black spore print color, where index = 0 is gill-size and index = 3 is black"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "test_mushroom = [1,0,0,0,0,0,0,0,1,0]\n",
-    "prediction = MLP.predict([test_mushroom])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "if prediction==1:\n",
-    "    print('Edible')\n",
-    "else:\n",
-    "    print('Poisonous')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Exercise:\n",
-    "Have a go at changing some of the learning parameters, e.g. add more layers, or more neurons per layer, or change the activation function, and see if you can improve performance beyond 98%!"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.1"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
-%% Cell type:markdown id: tags:
-
-## Classification MLP
-
-%% Cell type:code id: tags:
-
-``` python
-#import libraries
-import numpy as np
-import pandas as pd
-from sklearn.model_selection import *
-from sklearn.linear_model import *
-from sklearn import metrics
-from sklearn.neural_network import MLPClassifier
-
-def check_NaN(dataframe):
-    print("Total NaN:", dataframe.isnull().values.sum())
-    print("NaN by column:\n",dataframe.isnull().sum())
-    return
-
-def one_hot_encode(dataframe, col_name):
-    dataframe = pd.get_dummies(dataframe, columns=[col_name], prefix = [col_name])
-    return dataframe
-```
-
-%% Cell type:markdown id: tags:
-
-### Using a Multi-Layered Perceptron (MLP) to Classify Mushrooms as Edible or Poisonous
-In this Notebook, we'll be using the mushroom classification dataset, which you can find here https://www.kaggle.com/uciml/mushroom-classification to train an MLP to determine whether a mushroom is edible (e) or poisonous (p), based its physical characteristics.
-
-%% Cell type:code id: tags:
-
-``` python
-#load the dataset
-data = pd.read_csv("./datasets/mushrooms.csv")
-```
-
-%% Cell type:code id: tags:
-
-``` python
-#check out its features
-data.head()
-```
-
-%% Output
-
-      class cap-shape cap-surface cap-color bruises odor gill-attachment  \
-    0     p         x           s         n       t    p               f
-    1     e         x           s         y       t    a               f
-    2     e         b           s         w       t    l               f
-    3     p         x           y         w       t    p               f
-    4     e         x           s         g       f    n               f
-    
-      gill-spacing gill-size gill-color   ...   stalk-surface-below-ring  \
-    0            c         n          k   ...                          s
-    1            c         b          k   ...                          s
-    2            c         b          n   ...                          s
-    3            c         n          n   ...                          s
-    4            w         b          k   ...                          s
-    
-      stalk-color-above-ring stalk-color-below-ring veil-type veil-color  \
-    0                      w                      w         p          w
-    1                      w                      w         p          w
-    2                      w                      w         p          w
-    3                      w                      w         p          w
-    4                      w                      w         p          w
-    
-      ring-number ring-type spore-print-color population habitat
-    0           o         p                 k          s       u
-    1           o         p                 n          n       g
-    2           o         p                 n          n       m
-    3           o         p                 k          s       u
-    4           o         e                 n          a       g
-    
-    [5 rows x 23 columns]
-
-%% Cell type:markdown id: tags:
-
-Let's choose gill-size (narrow or broad) and spore print color as our features. Note spore-print-color: black=k, brown=n, buff=b, chocolate=h, green=r, orange=o, purple=u, white=w, yellow=y
-
-%% Cell type:code id: tags:
-
-``` python
-chosen_features = data.filter(['class','gill-size','spore-print-color'])
-chosen_features.head()
-```
-
-%% Output
-
-      class gill-size spore-print-color
-    0     p         n                 k
-    1     e         b                 n
-    2     e         b                 n
-    3     p         n                 k
-    4     e         b                 n
-
-%% Cell type:code id: tags:
-
-``` python
-#always remember to check for NaN values
-check_NaN(chosen_features)
-```
-
-%% Output
-
-    Total NaN: 0
-    NaN by column:
-     class                0
-    gill-size            0
-    spore-print-color    0
-    dtype: int64
-
-%% Cell type:markdown id: tags:
-
-One hot encode the chosen features
-
-%% Cell type:code id: tags:
-
-``` python
-subset = one_hot_encode(chosen_features, 'class')
-subset = one_hot_encode(subset, 'gill-size')
-subset = one_hot_encode(subset, 'spore-print-color')
-subset.head()
-```
-
-%% Output
-
-       class_e  class_p  gill-size_b  gill-size_n  spore-print-color_b  \
-    0        0        1            0            1                    0
-    1        1        0            1            0                    0
-    2        1        0            1            0                    0
-    3        0        1            0            1                    0
-    4        1        0            1            0                    0
-    
-       spore-print-color_h  spore-print-color_k  spore-print-color_n  \
-    0                    0                    1                    0
-    1                    0                    0                    1
-    2                    0                    0                    1
-    3                    0                    1                    0
-    4                    0                    0                    1
-    
-       spore-print-color_o  spore-print-color_r  spore-print-color_u  \
-    0                    0                    0                    0
-    1                    0                    0                    0
-    2                    0                    0                    0
-    3                    0                    0                    0
-    4                    0                    0                    0
-    
-       spore-print-color_w  spore-print-color_y
-    0                    0                    0
-    1                    0                    0
-    2                    0                    0
-    3                    0                    0
-    4                    0                    0
-
-%% Cell type:markdown id: tags:
-
-Now, let's just pick the 'class_e' feature. This means if the Perceptron returns a value of 1, then the mushroom is edible. If it returns 0, then the mushroom is poisonous. Let's also pick 'gill_size_b', because the only other value it can be is 'gill_size_n', which means the gill size will be broad when it = 1, and narrow when it = 0. We'll pick all the colours to train on.
-
-%% Cell type:code id: tags:
-
-``` python
-final = subset.filter(['class_e','gill-size_b','spore-print-color_h','spore-print-color_h','spore-print-color_k','spore-print-color_n','spore-print-color_o','spore-print-color_r','spore-print-color_u','spore-print-color_w','spore-print-color_y'])
-final.head()
-```
-
-%% Output
-
-       class_e  gill-size_b  spore-print-color_h  spore-print-color_h  \
-    0        0            0                    0                    0
-    1        1            1                    0                    0
-    2        1            1                    0                    0
-    3        0            0                    0                    0
-    4        1            1                    0                    0
-    
-       spore-print-color_k  spore-print-color_n  spore-print-color_o  \
-    0                    1                    0                    0
-    1                    0                    1                    0
-    2                    0                    1                    0
-    3                    1                    0                    0
-    4                    0                    1                    0
-    
-       spore-print-color_r  spore-print-color_u  spore-print-color_w  \
-    0                    0                    0                    0
-    1                    0                    0                    0
-    2                    0                    0                    0
-    3                    0                    0                    0
-    4                    0                    0                    0
-    
-       spore-print-color_y
-    0                    0
-    1                    0
-    2                    0
-    3                    0
-    4                    0
-
-%% Cell type:code id: tags:
-
-``` python
-#Create the train/test splits as we did before
-x_train, x_test, y_train, y_test = train_test_split(final.drop(['class_e'], axis=1),final['class_e'],test_size=0.2,random_state=1)
-print("x train/test ",x_train.shape, x_test.shape)
-print("y train/test ",y_train.shape, y_test.shape)
-```
-
-%% Output
-
-    x train/test  (6499, 10) (1625, 10)
-    y train/test  (6499,) (1625,)
-
-%% Cell type:code id: tags:
-
-``` python
-#Convert them from pandas to numpy arrays
-x = x_train.values
-y = y_train.values
-x_t = x_test.values
-y_t = y_test.values
-```
-
-%% Cell type:markdown id: tags:
-
-#### MLP Training and Evaluation
-Let's create an MLP. Currently the only loss function it supports is the Cross-Entropy loss function, which is used by default. By default, it uses the ReLU activation function. It also has a default of 1 hidden layer, containing 100 neurons.
-
-Here are some parameter options you can explore:
-* activation{‘identity’, ‘logistic’, ‘tanh’, ‘relu’}, default=’relu’
-* hidden_layer_sizestuple, length = n_layers - 2, default=(100,) where the ith element represents the number of neurons in the ith hidden layer.
-* Find out more here: https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html
-
-%% Cell type:code id: tags:
-
-``` python
-MLP = MLPClassifier() #activation='logistic', hidden_layer_sizes=(1,),hidden_layer_sizes=(1,)
-```
-
-%% Cell type:code id: tags:
-
-``` python
-#train the mlp
-MLP.fit(x, y)
-```
-
-%% Output
-
-    MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
-           beta_2=0.999, early_stopping=False, epsilon=1e-08,
-           hidden_layer_sizes=(100,), learning_rate='constant',
-           learning_rate_init=0.001, max_iter=200, momentum=0.9,
-           n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
-           random_state=None, shuffle=True, solver='adam', tol=0.0001,
-           validation_fraction=0.1, verbose=False, warm_start=False)
-
-%% Cell type:code id: tags:
-
-``` python
-predictions = MLP.predict(x_t)
-#Calculate the mean squared error and accuracy
-print("Mean squared error: ",np.mean((predictions - y_t) ** 2))
-print("Accuracy:",str(round(metrics.accuracy_score(y_t, predictions)*100))+"%")
-```
-
-%% Output
-
-    Mean squared error:  0.023384615384615386
-    Accuracy: 98.0%
-
-%% Cell type:markdown id: tags:
-
-Test a mushroom with a broad gill-size and black spore print color, where index = 0 is gill-size and index = 3 is black
-
-%% Cell type:code id: tags:
-
-``` python
-test_mushroom = [1,0,0,0,0,0,0,0,1,0]
-prediction = MLP.predict([test_mushroom])
-```
-
-%% Cell type:code id: tags:
-
-``` python
-if prediction==1:
-    print('Edible')
-else:
-    print('Poisonous')
-```
-
-%% Cell type:markdown id: tags:
-
-### Exercise:
-Have a go at changing some of the learning parameters, e.g. add more layers, or more neurons per layer, or change the activation function, and see if you can improve performance beyond 98%!