From 40fec9be5a049213090a830243af43526bd2025d Mon Sep 17 00:00:00 2001 From: ashepley <ashepley@myune.edu.au> Date: Fri, 10 Jul 2020 09:31:33 +1000 Subject: [PATCH] Upload New File --- ...nd_to_End_Machine_Learning_Topic__12.ipynb | 887 ++++++++++++++++++ 1 file changed, 887 insertions(+) create mode 100644 topic_12/End_to_End_Machine_Learning_Topic__12.ipynb diff --git a/topic_12/End_to_End_Machine_Learning_Topic__12.ipynb b/topic_12/End_to_End_Machine_Learning_Topic__12.ipynb new file mode 100644 index 0000000..cbf32ca --- /dev/null +++ b/topic_12/End_to_End_Machine_Learning_Topic__12.ipynb @@ -0,0 +1,887 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Training and Testing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Dataset 1: World Happiness Report up to 2020\n", + "Our goal is to predict the happiness score of a country using both Single and Multiple Linear Regression Models. You can access the World Happiness Report dataset here: www.kaggle.com/mathurinache/world-happiness-report \n", + "\n", + "#### What is Linear Regression?\n", + "Linear regression is statistical modelling technique used in supervised machine learning algorithm. Linear regression is used for predictive analysis, by displaying the relationship between two variables (x and y). " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sklearn.model_selection import *\n", + "from sklearn.linear_model import LinearRegression\n", + "import numpy as np\n", + "from sklearn.metrics import mean_squared_error\n", + "\n", + "def load_data(DATASET_PATH):\n", + " return pd.read_csv(DATASET_PATH)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "DATASET_PATH = './datasets/happiness/2015.csv'\n", + "\n", + "#create pandas object\n", + "happiness = load_data(DATASET_PATH)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Country</th>\n", + " <th>Region</th>\n", + " <th>Happiness Rank</th>\n", + " <th>Happiness Score</th>\n", + " <th>Standard Error</th>\n", + " <th>Economy (GDP per Capita)</th>\n", + " <th>Family</th>\n", + " <th>Health (Life Expectancy)</th>\n", + " <th>Freedom</th>\n", + " <th>Trust (Government Corruption)</th>\n", + " <th>Generosity</th>\n", + " <th>Dystopia Residual</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>Switzerland</td>\n", + " <td>Western Europe</td>\n", + " <td>1</td>\n", + " <td>7.587</td>\n", + " <td>0.03411</td>\n", + " <td>1.39651</td>\n", + " <td>1.34951</td>\n", + " <td>0.94143</td>\n", + " <td>0.66557</td>\n", + " <td>0.41978</td>\n", + " <td>0.29678</td>\n", + " <td>2.51738</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>Iceland</td>\n", + " <td>Western Europe</td>\n", + " <td>2</td>\n", + " <td>7.561</td>\n", + " <td>0.04884</td>\n", + " <td>1.30232</td>\n", + " <td>1.40223</td>\n", + " <td>0.94784</td>\n", + " <td>0.62877</td>\n", + " <td>0.14145</td>\n", + " <td>0.43630</td>\n", + " <td>2.70201</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>Denmark</td>\n", + " <td>Western Europe</td>\n", + " <td>3</td>\n", + " <td>7.527</td>\n", + " <td>0.03328</td>\n", + " <td>1.32548</td>\n", + " <td>1.36058</td>\n", + " <td>0.87464</td>\n", + " <td>0.64938</td>\n", + " <td>0.48357</td>\n", + " <td>0.34139</td>\n", + " <td>2.49204</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>Norway</td>\n", + " <td>Western Europe</td>\n", + " <td>4</td>\n", + " <td>7.522</td>\n", + " <td>0.03880</td>\n", + " <td>1.45900</td>\n", + " <td>1.33095</td>\n", + " <td>0.88521</td>\n", + " <td>0.66973</td>\n", + " <td>0.36503</td>\n", + " <td>0.34699</td>\n", + " <td>2.46531</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>Canada</td>\n", + " <td>North America</td>\n", + " <td>5</td>\n", + " <td>7.427</td>\n", + " <td>0.03553</td>\n", + " <td>1.32629</td>\n", + " <td>1.32261</td>\n", + " <td>0.90563</td>\n", + " <td>0.63297</td>\n", + " <td>0.32957</td>\n", + " <td>0.45811</td>\n", + " <td>2.45176</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Country Region Happiness Rank Happiness Score \\\n", + "0 Switzerland Western Europe 1 7.587 \n", + "1 Iceland Western Europe 2 7.561 \n", + "2 Denmark Western Europe 3 7.527 \n", + "3 Norway Western Europe 4 7.522 \n", + "4 Canada North America 5 7.427 \n", + "\n", + " Standard Error Economy (GDP per Capita) Family \\\n", + "0 0.03411 1.39651 1.34951 \n", + "1 0.04884 1.30232 1.40223 \n", + "2 0.03328 1.32548 1.36058 \n", + "3 0.03880 1.45900 1.33095 \n", + "4 0.03553 1.32629 1.32261 \n", + "\n", + " Health (Life Expectancy) Freedom Trust (Government Corruption) \\\n", + "0 0.94143 0.66557 0.41978 \n", + "1 0.94784 0.62877 0.14145 \n", + "2 0.87464 0.64938 0.48357 \n", + "3 0.88521 0.66973 0.36503 \n", + "4 0.90563 0.63297 0.32957 \n", + "\n", + " Generosity Dystopia Residual \n", + "0 0.29678 2.51738 \n", + "1 0.43630 2.70201 \n", + "2 0.34139 2.49204 \n", + "3 0.34699 2.46531 \n", + "4 0.45811 2.45176 " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "happiness.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Single Linear Regression\n", + "We'll be building a Single Linear Regression model to display the linear relationship between Economy (GDP per Capita) and a country's Happiness Score. This will hopefully allow us to predict a country's Happiness Score based on its Economy.\n", + "Single Linear Regression is a statistical method that shows the relationships between two continuous variables. The independent or explanatory variable (x), is the predictor, and y is the dependent variable." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Feature Selection" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Happiness Score</th>\n", + " <th>Economy (GDP per Capita)</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>7.587</td>\n", + " <td>1.39651</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>7.561</td>\n", + " <td>1.30232</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>7.527</td>\n", + " <td>1.32548</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>7.522</td>\n", + " <td>1.45900</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>7.427</td>\n", + " <td>1.32629</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Happiness Score Economy (GDP per Capita)\n", + "0 7.587 1.39651\n", + "1 7.561 1.30232\n", + "2 7.527 1.32548\n", + "3 7.522 1.45900\n", + "4 7.427 1.32629" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chosen_columns = ['Happiness Score','Economy (GDP per Capita)']\n", + "economy_happiness = happiness.filter(chosen_columns)\n", + "economy_happiness.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Train/test split" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "x train/test (126, 1) (32, 1)\n", + "y train/test (126,) (32,)\n" + ] + } + ], + "source": [ + "x_train, x_test, y_train, y_test = train_test_split(economy_happiness.drop(['Happiness Score'], axis=1),economy_happiness['Happiness Score'],test_size=0.2,random_state=42) \n", + "print(\"x train/test \",x_train.shape, x_test.shape)\n", + "print(\"y train/test \",y_train.shape, y_test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Convert from pandas to np" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "<class 'pandas.core.frame.DataFrame'> <class 'pandas.core.series.Series'>\n" + ] + } + ], + "source": [ + "print(type(x_train),type(y_train))" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "x = x_train.values\n", + "y=y_train.values" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "<class 'numpy.ndarray'> <class 'numpy.ndarray'>\n" + ] + } + ], + "source": [ + "print(type(x), type(y))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Create and train the model" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n", + " normalize=False)" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lin_reg_model = LinearRegression()\n", + "lin_reg_model.fit(x, y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If the coefficient of x is positive, then there exists a positive relationship between x (Economy) and y (the Happiness Score). If the coefficient of x is negative, then the relationship between x and y is negative, meaning as x increases, y decreases." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lin_reg_model.coef_, lin_reg_model.intercept_" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Test and evaluate the model" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "#convert the panda dataframe/series into np arrays\n", + "test_x = x_test.values\n", + "test_y = y_test.values\n", + "\n", + "#pass the test x dataset into the model\n", + "predictions = lin_reg_model.predict(test_x)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* The mean squared error indicates how close a regression line is to a set of points. \n", + "* The distances between the points and the regression line (error between predicted value and actual value) are squared.\n", + "* Squaring removes any negative signs and emphasises large errors.\n", + "* The mean of all the squared errors represents the average of the errors " + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.5080153381136627" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#mean squared error\n", + "np.mean((predictions - test_y) ** 2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Visualise the predictions v Actual Happiness Scores**" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEWCAYAAABliCz2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3XucHFWd9/HPlyTAcB2UeMkAiYCEFRACkUWjyE0iiBCjK7DiBS+ouAvqGhZcXfG2QaM+67oPCILgXQEhIiqRJcKiK2AgSFTIIyKXTMgShEACA4bwe/6o00mnM91TPdPV1+/79erXdFWdrvp19fSvq06dc0oRgZmZdb/NWh2AmZk1hxO+mVmPcMI3M+sRTvhmZj3CCd/MrEc44ZuZ9Qgn/A4maYqkkDQ+Tf9M0ttHsZ5dJK2RNK7xUXaXtL93b3Ucw0mf4a6tjsPalxN+wSTdK2kofRn/V9LFkrYpYlsRcVREfCNnTEeUve7+iNgmItYVEVejSDo7JdwD63jNRu+1SJKul/TuinmHSFrWjO2nz/CeZmxrJJKOk3S7pMclPSzpOklTWh1Xr3PCb47XR8Q2wP7Ay4CPVRZQxp9HFZIEvBV4BKj7LMaaJ50BfRP4J2B74EXAucCzDdyGvy+j4B3WRBExCPwM2BvWHxF+VtKvgCeBXSVtL+kiSQ9KGpT0mVJVi6Rxkr6QjpjuAV5Xvv7KI0xJ75F0p6TVkv4gaX9J3wJ2AX6czjrOGKZqaJKkqyQ9IuluSe8pW+fZki6V9M203t9Lml62/J9T3KslLZV0eOV+kHSQpBXlVUiS3iDpjhq771XAJOB04ARJm1esM+973eSIu/wsQNKBkn4taVX6DP6zcltjIenksjjvkfTesmWHSFom6aPpM75X0lvKll8i6auSrk2vv0HS5LLl66ubUtn/K+knqezNknYrK7tnWs8j6XN6c9myo9M+XJ0+y4+k+TtKujrtm0ck3Vgl6e4H/DkirovM6oj4YUTcn9YzLr3HP6Vt3Cpp57TsFZJ+I+mx9PcVZXHV+33ZPe2jx9L+/MFYP7+OFxF+FPgA7gWOSM93Bn4PfDpNXw/cD+wFjAcmAPOB84GtgecBtwDvTeXfB9yV1vMc4BdAAOPL1vfu9PzvgEGyMwoBuwOTK2NK01Mq1nMD2RHZlmRf3pXA4WnZ2cBTwNHAOGAucFNaNhV4AJhUtt7dquyXPwGvKZu+DDizxn68CLg07aO/ALPLltXzXg8BltX4jA4ADkqfxxTgTuCDZWUD2L1KjOv3f7Xtkf1I75bifDVZ4tq/rOwzwJeALdLyJ4CpafklwGrg4LT8y8Avh4stlX0EODC9l+8A30/Ltk6f08lp2f7Aw8BeafmDwKvS8x3K4psLfDV9BhPIfoQ1zH7YNf2P/B/gUGCbiuVzgCXp/0XAvsBzyf6nHyU7kxsPnJimnzvK78v3gH8hO7DdEnhlq/NBqx8tD6DbHymZrAFWAfeRJdK+tOx64FNlZZ8PPF1anuadCPwiPV8IvK9s2ZFUT/gLgNNrxDRswif7MVkHbFu2fC5wSXp+NvBfZcteAgyl57sDDwFHABNG2C+fAb6enm9LltgmVym7FfA4MCtNnw/8qGx5Pe/1EGok/GFe/0HgyrLpkRL+k+mzLj3WVG6v4jXzS7GzIeFvXbb8UuDj6fklpKSdprdJn9XOlbGlsheWlT0auCs9Px64sSKO84FPpOf3A+8Ftqso8yngR9Xef0XZg1LsK8mS/yWkxA8sBY4b5jVvBW6pmPdr4B2j/L58E7gA2KkR3+VueLhKpzlmRUR/REyOiFMjYqhs2QNlzyeTHbU8mE6bV5F9EZ+Xlk+qKH9fjW3uTHYUXa9JwCMRsbpiOwNl0yvKnj8JbClpfETcTZYgzwYekvR9SZOqbOe7wGxJWwCzgdsiotr7eQNZIvxpmv4OcJSkiWl6tO91E5L2SNUWKyQ9DvwbsGMdqzgtfdb9EdEPHFOx/qMk3ZSqRFaRJeLy9T8aEU+UTd9H9pmUrP/8I2IN2VF8tX1c+TmVGgtMBv629D+W4ngL8IK0/I0prvtSlcjL0/x5wN3Az1N11JnVdkJE3BQRb46IiWRnAgeTHW1D9c9rEpv+T1f+79XzfTmD7AzillT1+M5q8fYKJ/zWKx+u9AGyI5Ydy5LGdhGxV1r+INmXpWSXGut9gKzqYKRtVloOPEfSthXbGazxmg0rjvhuRLyS7MsYwOeqlPsD2Zf5KODvyX4Aqnk7WbK6X9IKsuqfCWRHc1Dfe32C7IwByOqTgYlly88jqzZ7cURsB3yULGmMWfpx+yHwBeD56QfhpxXr30HS1mXTu5B9JiXrP39lrb2eU7E8jweAG8p/mCJr4fN+gIj4TUQcR5Y455MdqRNZXfw/RcSuwOuBDw93jaZSRPwGuIJ07Yrqn9dysv+bcpX/e7m/LxGxIiLeExGTyM5YzlWbNqltFif8NhIRDwI/B74oaTtJm0naTdKrU5FLgdMk7SRpB6DqERZwIfARSQcos3vZBb7/JatnHS6GB4D/AeZK2lLSS4F3kR1V1yRpqqTDUmJ7Chgiq3Ko5rvAaWRHf5dVWecAcDjZkfJ+6bEv2Q9JqbVOPe/1/5GdkbxO0gSyFlNblC3flqz6aI2kPYH3j/S+67B52tZK4BlJR5FVy1X6pKTNJb2K7H2X75ujJb1S2YXkTwM3p8+sHlcDe0h6q6QJ6fEySX+TtvsWSdtHxFqyfbEOQNIxad+qbP4mn2+K7z2Snpem9wSOBW5KRS4EPi3pxenzeqmk55L9+O0h6e8ljZd0PFmV4dXDvYmRvi+S/k7STqn4o2Q/Fm3d9LhoTvjt521kieEPZP+klwMvTMu+RlZf/VvgNrKjpmFFxGXAZ8mS6mqyI7XnpMVzgY+l0+CPDPPyE8nq9ZcDV5LV7V6bI/YtgHPILgCuIDtC/GiN8t8jq7deGBEPVynzVuD2iPh5OmJbERErgP8AXipp73rea0Q8BpxKlnQGyY74y1vtfITsjGM12f5uWMuOVE12GtkP96NpO1dVFFuRli0n+5F9X0TcVbb8u8AnyKpyDiCrihlNHEcCJ6TtrCD7AS398L0VuDdVab0POCnNfzHwX2TXJX4NnBsR1w+ziVVkCX6JpDXANWT/R59Py79Etg9+TvbDcRFZPfxfyH7g/onswvwZwDE1/jeg9vflZcDNKYaryK6V/HmE3dPVFOEboJi1A0mHAN+OiJ2qLL+E7ALwJv04zPLwEb6ZWY9wwjcz6xGu0jEz6xE+wjcz6xHjWx1AuR133DGmTJnS6jDMzDrGrbfe+nDq4Daitkr4U6ZMYdGiRa0Ow8ysY0iq1eN+I67SMTPrEU74ZmY9wgnfzKxHOOGbmfUIJ3wzsx7hhG9m1iPaqlmmmVm7mr94kHkLlrJ81RCT+vuYM3Mqs6YNjPzCNuKEb2Y2gvmLBznriiUMrc2G0x9cNcRZVywB6Kik7yodM7MRzFuwdH2yLxlau455C5a2KKLRccI3MxvB8lVDdc1vV074ZmYjmNTfN+z8/q0mNDmSsXHCNzMbwZyZU5kwbtN72a956hnmLx4c5hXtyQnfzGwEs6YNsPXmm7ZxWftsdFQ9vhO+mVkOjw2tHXZ+J9XjO+GbmeVQrR6/2vx25IRvZpbDnJlT6ZswbqN5fRPGMWfm1BZFVD93vDIzy6HUwaqTe9s64ZuZ5TRr2kBHJfhKhVXpSJoq6fayx+OSPljU9szMrLbCjvAjYimwH4CkccAgcGVR2zOz+nXDgGCWX7OqdA4H/hQRuW+2a2bF6pYBwSy/ZiX8E4DvNWlbZpZDrQHBej3hd+uZT+HNMiVtDhwLXFZl+SmSFklatHLlyqLDMbOkWwYEa7TSmc/gqiGCDWc+nTSEQjXNaId/FHBbRPzvcAsj4oKImB4R0ydOnNiEcMwMuqMjURGaNRTy/MWDzDhnIS868yfMOGdhU35QmpHwT8TVOWZtpxs6EhWhGWc+rTqLKDThS9oKeA1wRZHbMbP6zZo2wNzZ+zDQ34eAgf4+5s7epyvqqseiGWc+rbqhSqEXbSPiSeC5RW7DzEav0zsSFWHOzKkbtV6Cxp/5tOr6icfSMTMr04wzn1ZdP/HQCmY2Jt3YhLHoM59mnEUMxwnfzEbNnbdGp1UDsTnhm9moufPW6LXi+onr8M1s1Nx5q7M44ZvZqLnzVmdxwjezUXPnrc7iOnwzG7VuuAtUL3HCN7MxceetzuEqHTOzHuGEb2bWI1ylY2aj0o09bLudE76Z1c09bDuTq3TMrG6tGt7XxsZH+GZWt2b3sHX1UWP4CN/M6latJ+1mUsPv2tTN95htNid8M6vbcD1sAdZFNDwZu/qocZzwzaxupZuEjJM2WdboZOwB2hrHCd/MRmXWtAGejRh2WSOTsQdoaxwnfDMbtWYkYw/Q1jgjJnxJMyRtnZ6fJOlLkiYXH5qZtbtmJONm3GO2V+RplnkesK+kfYEzgIuAbwKvLjIwM2t/zRot0wO0NUaehP9MRISk44AvR8RFkt5edGBm1hmcjDtHnoS/WtJZwFuBV0kaB0woNiwzM2u0PBdtjweeBt4ZESuAAWBeoVGZmVnDjZjwU5L/IbBFmvUwcGWRQZmZWePlaaXzHuBy4Pw0awCYX2RQZmbWeHmqdD4AzAAeB4iIPwLPKzIoMzNrvDwJ/+mI+GtpQtJ4YPjudWZm1rbyJPwbJH0U6JP0GuAy4MfFhmVmZo2WJ+GfCawElgDvBX4KfKzIoMzMrPFqtsNPbe6/EREnAV9rTkhmZlaEmkf4EbEOmChp8ybFY2ZmBcnT0/Ze4FeSrgKeKM2MiC8VFZSZmTVenoS/PD02A7YtNhwzMyvKiAk/Ij4JIGnbbDLWFB6VmZk1XJ6etntLWgz8Dvi9pFsl7ZVn5ZL6JV0u6S5Jd0p6+VgDNjOz0clTpXMB8OGI+AWApEPIWuy8IsdrvwxcExFvShd+txptoGZmjTJ/8WDhY/i3ozwJf+tSsgeIiOtLd8CqRdJ2wMHAO9Lr/gr8tdZrzMyKNn/xIGddsYShtesAGFw1xFlXLAHo+qSfp+PVPZI+LmlKenwM+HOO1+1K1mHrYkmLJV043A+FpFMkLZK0aOXKlXWGb2ZWn3kLlq5P9iVDa9cxb8HSFkXUPHkS/juBicAV6bEjcHKO140H9gfOi4hpZE06z6wsFBEXRMT0iJg+ceLE3IGbmY3G8lVDdc3vJnla6TwKnDaKdS8DlkXEzWn6coZJ+Gb16tX6V2uMSf19DA6T3Cf197UgmubK00rnWkn9ZdM7SFow0uvSjVMekFS6ff3hwB9GHakZG+pfB1cNEWyof52/eLDVoVmHmDNzKn0Txm00r2/COObMnFrlFd0jT5XOjhGxqjSRjvjzjof/j8B3JN0B7Af8W/0hmm3Qy/Wv1hizpg0wd/Y+DPT3IWCgv4+5s/fpibPEPK10npW0S0TcDyBpMjnHw4+I24HpY4jPbCO9XP9qjTNr2kBPJPhKeRL+vwC/lHRDmj4YOKW4kMyq699qAo8+uXbY+WZWW56LttdI2h84KM36UEQ8XGxYZsOLKueW1eab2QZV6/AlTZa0PUBK8E8ArwHe5uGSrVUeG9r06L7WfDPboNZF20uBrQEk7Ud2a8P7gX2Bc4sPzWxT1ZrO9UKTOrOxqpXw+yJieXp+EvD1iPgiWaerAwuPzGwYvdykzmysatXhq+z5YcBZABHxrKThX2FWsFLLCne8MnAnvHrVSvgLJV0KPAjsACwEkPRCPAiatVCvNqmzjfXyIGijVatK54NkY+fcC7wyIkpXxV5A1lTTzKxl3AmvflWP8CMigO8PM39xoRGZmeXgTnj1yzO0gplZ23GLrfo54ZtZR3KLrfrlGVphPUk7ADtHxB0FxWNmlotbbNVvxIQv6Xrg2FT2dmClpBsi4sMFx2ZmDdSNTRjdYqs+eap0to+Ix4HZwMURcQBwRLFhmVkj+T4CBvkS/vjU9v7NwNUFx2NmBXATRoN8Cf9TwALg7oj4jaRdgT8WG5aZNZKbMBrkGx75MrKB00rT9wBvLDIoM2usXr6Pq22Q5562n5e0naQJkq6T9LCkk5oRnJk1hpswGuSr0jkyXbQ9BlgG7AHMKTQqM2uoXr6Pq22Qpx1+6d5xRwPfi4hHPFqmWedxE0bLk/B/LOkuYAg4VdJE4KliwzIzs0bLc9H2TEmfAx6PiHWSngSOKz40M+sE3dihq1vluWi7FfAB4Lw0axIwvcigzKwzuENXZ8lz0fZishuevCJNLwM+U1hEZtYx3KGrs+RJ+LtFxOeBtQARMcTGtz80sx7lDl2dJU/C/6ukPiAAJO0GPF1oVGbWETwmfWfJk/A/AVwD7CzpO8B1wBmFRmVmHcEdujpLnlY610q6DTiIrCrn9Ih4uPDIzKzteUz6zpL3BihbAo+m8i+RRET8d3FhmdlI2qU5pDt0dY48N0D5HHA88Hvg2TQ7ACd8sxYpNYcstZApNYcEnHytqjxH+LOAqRHhC7VmbaJWc0gnfKsmz0Xbe9gwno6ZtQE3h7TRyHOE/yRwu6TrKGuOGRGnFRaVmdXk8e1tNPIk/KvSw8zaxJyZUzeqwwc3h7SR5WmW+Y1mBNJt2qUFhXUnN4e00aia8CVdGhFvlrSE1Mu2XES8tNDIOphbUFgzuDmk1avWEf7p6e8xo125pHuB1cA64JmI6IlRNt2CwszaUdWEHxEPpr/3SXoBcCDZkf5vImJFHds4tNd65roFhZm1ozzj4b8buAWYDbwJuEnSO4sOrBnmLx5kxjkLedGZP2HGOQsbNoa3B5Qys3aUp5XOHGBaRPwFQNJzgf8Bvp7jtQH8XFIA50fEBZUFJJ0CnAKwyy675I17zIqsZ3cLCsvLF/etmfJ0vFpGVg9fshp4IOf6Z0TE/sBRwAckHVxZICIuiIjpETF94sSJOVc7dkXeuGHWtAHmzt6Hgf4+BAz09zF39j7+IttGfLcoa7Y8R/iDwM2SfkR2xH4ccIukDwNExJeqvTAilqe/D0m6kuw6QFuMwVN0PXu7t6DwkWXr+eK+NVuehP+n9Cj5Ufq7ba0XSdoa2CwiVqfnRwKfGlWUBejlnoqd3Gy0m36ofHHfmi1Px6tPAkjaLpuM1SO8pOT5wJWSStv5bkRcM9pAG62X69k79ciyk3+ohtPLBx3WGnla6UxPna/uAJZI+q2kA0Z6XUTcExH7psdeEfHZRgTcKL1cz96pR5bddsNs3y3Kmi1Plc7XgVMj4kYASa8ELgY6vqdtu9ezF6VTjyw79YeqGg+PYM2WJ+GvLiV7gIj4paS81TrWhjq1OqtTf6hq6dWDDmuNPM0yb5F0vqRDJL1a0rnA9ZL2l7R/0QFa43VqdZarQMzGRhGbjIu2cQHpFzUWR0Qc1qhgpk+fHosWLWrU6qwLdVMrHbNGkHRr3nHK8rTSOXTsIZk1hqtAzEYvTx0+kl4H7AVsWZoXEW3Tpt7MGstnUt1pxIQv6avAVsChwIVkA6jdUnBcZtYi3dbfwTbIc9H2FRHxNuDR1Anr5cDOxYZlZq3Sbf0dbIM8Cb/UDu5JSZOAtcCLigvJzFqp2/o72AZ5Ev7VkvqBecBtwL3A94sMysxax/dz6F4jJvyI+HRErIqIHwKTgT0j4uPFh2ZmreD+Dt2r1k3MZ9dYRkRcUUxIZtZKHvKhe9VqpfP6iuc/LpsOwAnfrEu5v0N3qnUT85NLzyUtLp+29uX202ZWTa6OV2RH9Nbm3H7azGrJ00rHOoTbT5tZLbUu2v6YDUf2u0q6qnx5RBxbZGBWP7efNrNaalXpfKHs+ReLDsTGrhvHizezxql10faGZgZiY9epNzYxs+bIe9HWOoDbT5tZLU74Xabo9tNu9mnWuZzwLTc3+zTrbHlb6WzCrXSK125H07WafTrhm7W/vK10rMna8WjazT7NOptb6bSpdjyadrNPs842Yk9bSS+WdLmkP0i6p/RoRnC9rB2Ppj1srllnyzO0wsXAecAzZPe1/SbwrSKDsva8CcWsaQPMnb0PA/19CBjo72Pu7H1cf2/WIfK00umLiOskKSLuA86WdCPwiYJj62nt2onKw+a2RrtdwLfOlCfhPyVpM+CPkv4BGASeV2xY5k5UVtKOF/CtMymi9sjHkl4G3An0A58Gtgc+HxE3NTqY6dOnx6JFixq9WrOONuOchcNeLB/o7+NXZx7WgoisnUi6NSKm5yk74hF+RPwmPV0D+CYoZk3WjhfwrTONmPAl/YJhOmBFhA8tzJrAzWGtUfLU4X+k7PmWwBvJWuyYWRO06wV86zx5qnRurZj1K0nulGXWJL6Ab42Sp0rnOWWTmwEHAC8oLCIz24Sbw1oj5KnSuZWsDl9kVTl/Bt5VZFBmZtZ4eRL+30TEU+UzJG2RdwOSxgGLgMGIOKbO+MzMrEHyDK3wP8PM+3Ud2zidrB2/mZm1UK3x8F8ADAB9kqaRVekAbAdslWflknYCXgd8Fvjw2ELtXu42b2bNUKtKZybwDmAn4ItsSPiPAx/Nuf5/B84Atq1WQNIpwCkAu+yyS87VdqbhEjvgbvNm1hR5hlZ4Y0T8sO4VS8cAR0fEqZIOAT4yUh1+Nw+tUDkeCmRtqbecsBmPPrl2k/LuNm9medQztEKeOvwDJPWXrXwHSZ/J8boZwLGS7gW+Dxwm6dt5gupG1W5oMlyyB3ebN7PGy5Pwj4qIVaWJiHgUOHqkF0XEWRGxU0RMAU4AFkbESaOOtMPVm8Ddbd7MGi1Pwh9X3gxTUh+Qu1mmZaol8P6+Cb6LlJk1RZ6E/23gOknvkvRO4Fqyu17lFhHX93ob/Gq3Bzz72L18Fykza4o8Y+l8XtIdwBFkLXU+HRELCo+sy4w0HooTvJkVbcRWOpu8QJoB/H1EfKDRwXRzK51GcHt9M6vU0BugpBXuB5wIHE82ls4Vow/PRsO3uTOzsapahy9pD0n/KulO4D+BZWRnBIdGxFeaFqEB1Zt1zluwtEURmVmnqXWEfxdwI/D6iLgbQNKHmhKVbcK3uTOzsaqV8N9I1n7+F5KuIes8pRrlrYax1r/7NndmNlZVq3Qi4sqIOB7YE7ge+BDwfEnnSTqySfF1hVL9++CqIYIN9e/zFw/mXke1Zp1ur29meY3YDj8inoiI76R29DsBtwNnFh5ZF2lE/fusaQNt215//uJBZpyzkBed+RNmnLOwrh8yM2ueXK10SiLiEeD89LCcGlX/3o63uXPrIbPOkaenrY1RtXr2bqh/d+shs87hhN8E3Vz/7tZDZp3DCb8J2rn+fay6+ezFrNvUVYdvo9eO9e+NMGfm1GFv7NINZy9m3cYJ38ZkpEHhzKx9OOHbmHXr2YtZt3EdvplZj3DCNzPrEU74ZmY9wgnfzKxHOOGbmfUIJ3wzsx7hhG9m1iOc8M3MeoQTvplZj3DCNzPrEU74ZmY9wgnfzKxHOOGbmfUIJ3wzsx7hhG9m1iOc8M3MeoQTvplZj3DCNzPrEU74ZmY9wgnfzKxHOOGbmfWIwhK+pC0l3SLpt5J+L+mTRW3LzMxGNr7AdT8NHBYRayRNAH4p6WcRcVOB2zQzsyoKS/gREcCaNDkhPaKo7ZmZWW2F1uFLGifpduAh4NqIuHmYMqdIWiRp0cqVK4sMx8yspxVZpUNErAP2k9QPXClp74j4XUWZC4ALAKZPnz6qM4D5iweZt2Apy1cNMam/jzkzpzJr2sCY4zcz6yZNaaUTEauA64HXNnrd8xcPctYVSxhcNUQAg6uGOOuKJcxfPNjoTZmZdbQiW+lMTEf2SOoDjgDuavR25i1YytDadRvNG1q7jnkLljZ6U2ZmHa3IKp0XAt+QNI7sh+XSiLi60RtZvmqorvlmZr2qyFY6dwDTilp/yaT+PgaHSe6T+vuK3rSZWUfp+J62c2ZOpW/CuI3m9U0Yx5yZU1sUkZlZeyq0lU4zlFrjuJWOmVltHZ/wIUv6TvBmZrV1fJWOmZnl44RvZtYjnPDNzHqEE76ZWY9wwjcz6xFO+GZmPULZsPXtQdJK4L4GrGpH4OEGrKdVHH9rOf7Wcvz1mRwRE/MUbKuE3yiSFkXE9FbHMVqOv7Ucf2s5/uK4SsfMrEc44ZuZ9YhuTfgXtDqAMXL8reX4W8vxF6Qr6/DNzGxT3XqEb2ZmFZzwzcx6RMcmfElbSrpF0m8l/V7SJ4cp8w5JKyXdnh7vbkWs1UgaJ2mxpE1u/ShpC0k/kHS3pJslTWl+hLWNEH9b73sASfdKWpLiWzTMckn6j/QZ3CFp/1bEOZwcsR8i6bGy/f+vrYizGkn9ki6XdJekOyW9vGJ52+57yBV/W+7/Th4P/2ngsIhYI2kC8EtJP4uImyrK/SAi/qEF8eVxOnAnsN0wy94FPBoRu0s6AfgccHwzg8uhVvzQ3vu+5NCIqNZJ5ijgxenxt8B56W+7qBU7wI0RcUzToqnPl4FrIuJNkjYHtqpY3u77fqT4oQ33f8ce4UdmTZqckB4dcwVa0k7A64ALqxQ5DvhGen45cLgkNSO2PHLE3w2OA76Z/tduAvolvbDVQXU6SdsBBwMXAUTEXyNiVUWxtt33OeNvSx2b8GF9lcLtwEPAtRFx8zDF3phOCS+XtHOTQ6zl34EzgGerLB8AHgCIiGeAx4DnNie0XEaKH9p335cE8HNJt0o6ZZjl6z+DZFma1w5Gih3g5anK82eS9mpmcCPYFVgJXJyqBC+UtHVFmXbe93nihzbc/x2d8CNiXUTsB+wEHChp74oiPwamRMRLgf9iwxFzS0k6BngoIm6tVWyYeW1xBpMz/rbc9xVmRMT+ZNUHH5B0cMXytv0MGDn228jGWNkX+Aowv9kB1jAe2B84LyKmAU8AZ1aUaed9nyf+ttz/HZ3wS9Lp1PXAayvm/yUink6TXwMOaHJo1cwAjpV0L/B94DBJ364oswzYGUDSeGB74JFmBlnDiPG38b5fLyKWp78PAVcCB1YUWf8ZJDsBy5sTXW0jxR4Rj5eqPCPip8AESTs2PdDhLQOWlZ2RX06WQCvLtOW+J0f87br/OzbhS5ooqT897wOOAO6qKFNe53cs2QXGlouIsyJip4iYApwALIyIkyqKXQW8PT1/UyokfUpSAAADhElEQVTTFkc4eeJv131fImlrSduWngNHAr+rKHYV8LbUYuQg4LGIeLDJoW4iT+ySXlC65iPpQLLv+l+aHetwImIF8ICkqWnW4cAfKoq15b6HfPG36/7v5FY6LwS+IWkc2c68NCKulvQpYFFEXAWcJulY4Bmyo+N3tCzaHCpivwj4lqS7yWI/oaXB5dBh+/75wJXpOzke+G5EXCPpfQAR8VXgp8DRwN3Ak8DJLYq1Up7Y3wS8X9IzwBBwQrscMCT/CHwntXC5Bzi5Q/Z9yUjxt+X+99AKZmY9omOrdMzMrD5O+GZmPcIJ38ysRzjhm5n1CCd8M7Me4YRvHUvSujQS4e8kXSZpuAGs8q7rEKVRPyUdK6my52R52X5Jp5ZNT5J0+Wi3bdYsTvjWyYYiYr+I2Bv4K/C+8oWp007d/+MRcVVEnFOjSD9waln55RHxpnq3Y9ZsTvjWLW4Edpc0JY1Pfi7ZeCY7SzpS0q8l3ZbOBLYBkPRaZeOZ/xKYXVqRsrH8/zM9f76kK9MgWL+V9ArgHGC3dHYxL23zd6n8lpIuVjZW/WJJh5at8wpJ10j6o6TPp/njJF2SzlKWSPpQM3ea9ZZO7mlrBqwfa+go4Jo0aypwckScmsYv+RhwREQ8IemfgQ+nhPs14DCy3pw/qLL6/wBuiIg3pF7d25ANlLV3GrgPbXxzmg8ARMQ+kvYkG9Fyj7RsP2Aa2b0clkr6CvA8YCCdpVAaLsSsCD7Ct07Wp2x47EXA/aTxyYH7ym6EcxDwEuBXqezbgcnAnsCfI+KPqct75eB1JYeR3XyjNDrrYyPE9ErgW6n8XcB9QCnhXxcRj0XEU2Rjr0wm65a/q6SvSHot8Hj+t29WHx/hWycbKh1ll6TxZZ4on0V2r4QTK8rtRzHD7da6Sc3TZc/XAeMj4lFJ+wIzyc4O3gy8s4C4zHyEb13vJmCGpN0BJG2VqljuAl4kabdU7sQqr78OeH967ThldztaDWxbpfx/A29J5fcAdgGWVgsuVTltFhE/BD7OpsMEmzWME751tYhYSTZS5/ck3UH2A7BnqlY5BfhJumh7X5VVnA4cKmkJcCuwV0T8hayK6HeS5lWUPxcYl8r/AHhH2X0BhjMAXJ+qmy4BzhrN+zTLw6Nlmpn1CB/hm5n1CCd8M7Me4YRvZtYjnPDNzHqEE76ZWY9wwjcz6xFO+GZmPeL/A0otwF5HMS37AAAAAElFTkSuQmCC\n", + "text/plain": [ + "<Figure size 432x288 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "%matplotlib inline\n", + "import matplotlib.pyplot as plt\n", + "\n", + "plt.scatter(lin_reg_model.predict(test_x), test_y)\n", + "plt.title('Predictions v Actual Happiness Scores')\n", + "plt.xlabel('Predictions')\n", + "plt.ylabel('Actual Happiness Scores');" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Multiple Linear Regression\n", + "Multiple linear regression is a statistical modelling technique that uses several predictors, or explanatory variables to predict the dependent variable (y). Multiple Linear Regression can provide better prediction reliability, and lower error because the combination of independent variables can lead to a higher probabliity of a given outcome. For example, if a country has a high GDP per capita (wealthy) it is more likely that people will be healthy, with a strong family unit, meaning they will be happiness overall. Therefore we would expect our Multiple Linear Regression model to outperform our Single Linear Regression model, which relies solely on Economy to predict happiness. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Feature Selection**" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Happiness Score</th>\n", + " <th>Family</th>\n", + " <th>Health (Life Expectancy)</th>\n", + " <th>Economy (GDP per Capita)</th>\n", + " <th>Freedom</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>7.587</td>\n", + " <td>1.34951</td>\n", + " <td>0.94143</td>\n", + " <td>1.39651</td>\n", + " <td>0.66557</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>7.561</td>\n", + " <td>1.40223</td>\n", + " <td>0.94784</td>\n", + " <td>1.30232</td>\n", + " <td>0.62877</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>7.527</td>\n", + " <td>1.36058</td>\n", + " <td>0.87464</td>\n", + " <td>1.32548</td>\n", + " <td>0.64938</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>7.522</td>\n", + " <td>1.33095</td>\n", + " <td>0.88521</td>\n", + " <td>1.45900</td>\n", + " <td>0.66973</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>7.427</td>\n", + " <td>1.32261</td>\n", + " <td>0.90563</td>\n", + " <td>1.32629</td>\n", + " <td>0.63297</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Happiness Score Family Health (Life Expectancy) \\\n", + "0 7.587 1.34951 0.94143 \n", + "1 7.561 1.40223 0.94784 \n", + "2 7.527 1.36058 0.87464 \n", + "3 7.522 1.33095 0.88521 \n", + "4 7.427 1.32261 0.90563 \n", + "\n", + " Economy (GDP per Capita) Freedom \n", + "0 1.39651 0.66557 \n", + "1 1.30232 0.62877 \n", + "2 1.32548 0.64938 \n", + "3 1.45900 0.66973 \n", + "4 1.32629 0.63297 " + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chosen_features = ['Happiness Score','Family', 'Health (Life Expectancy)', 'Economy (GDP per Capita)', 'Freedom']\n", + "multiple_happiness = happiness.filter(chosen_features)\n", + "multiple_happiness.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Create the train/test splits**" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "x train/test (126, 4) (32, 4)\n", + "y train/test (126,) (32,)\n" + ] + } + ], + "source": [ + "x_train, x_test, y_train, y_test = train_test_split(multiple_happiness.drop(['Happiness Score'], axis=1),multiple_happiness['Happiness Score'],test_size=0.2,random_state=42) \n", + "print(\"x train/test \",x_train.shape, x_test.shape)\n", + "print(\"y train/test \",y_train.shape, y_test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Prepare the training sets**" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "#convert from pandas dataframe/series to np array for training\n", + "x = x_train.values\n", + "y=y_train.values" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Create and train a linear regression model**" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n", + " normalize=False)" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lin_reg_model = LinearRegression()\n", + "lin_reg_model.fit(x, y)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([1.20973375, 1.06133406, 0.91949607, 1.74411857]), 1.960421057319616)" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lin_reg_model.coef_,lin_reg_model.intercept_" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Let's test it!**" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "#convert the panda dataframe/series into np arrays\n", + "test_x = x_test.values\n", + "test_y = y_test.values\n", + "\n", + "#pass the test x dataset into the model\n", + "predictions = lin_reg_model.predict(test_x)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Let's Evaluate it!**" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.2320463982294615" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#mean squared error\n", + "np.mean((predictions - test_y) ** 2)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0.70905 0.48246 0.27108 0.44017] 4.3069999999999995 4.347199667982519\n", + "[1.25712 0.99111 1.27074 0.49615] 5.987 6.566885214446166\n", + "[0.747 0.61712 0.8818 0.17288] 4.194 4.6313974975723315\n", + "[0.41587 0.22396 0.0153 0.1185 ] 2.905 2.921955747845097\n", + "[0.95152 0.43873 0.18847 0.46582] 5.0569999999999995 4.56288874190275\n", + "[1.24823 0.78723 1.05351 0.44974] 6.574 6.0590592182554275\n", + "[1.04103 0.07612 0.37545 0.31767] 4.898 4.199857874493746\n", + "[0.73803 0.54909 0.59066 0.59591] 4.876 5.018456020765323\n", + "[0.91916 0.79081 0.83223 0.09245] 4.949 4.838149496430775\n", + "[1.13299 0.33861 0.21102 0.45727] 3.931 4.681980781245059\n", + "[1.12575 0.80925 1.42727 0.64157] 6.901 6.612506721311087\n", + "[1.14184 0.74314 0.59325 0.55475] 5.827999999999999 5.643504054954601\n", + "[1.23287 0.69702 0.98124 0.49049] 6.983 5.94935561254865\n", + "[1.22668 0.53886 0.95847 0.4761 ] 5.547999999999999 5.72797197341618\n", + "[1.1985 0.79661 1.06353 0.5421 ] 6.7860000000000005 6.179154614012047\n", + "[1.20643 0.84483 1.17898 0.46364] 6.505 6.209237609834251\n", + "[0.95571 0. 0.33024 0.4084 ] 4.507 4.132528102453181\n", + "[0.54447 0.69805 1.0088 0.30033] 4.686 4.8113477999285355\n", + "[1.02626 0.09131 0.08308 0.34037] 4.971 3.968870196574679\n", + "[1.07008 0.92356 1.20806 0.49027] 5.695 6.201034067408417\n", + "[0.77115 0.15185 0.46534 0.46866] 3.655 4.299747723696424\n", + "[1.28566 0.89667 1.30782 0.5845 ] 6.937 6.689366416355663\n", + "[1.29704 0.89042 1.33723 0.62433] 7.2 6.793010470490197\n", + "[1.30923 0.93156 1.33358 0.65124] 7.284 6.894998478232223\n", + "[1.2089 0.8116 1.06166 0.60362] 6.485 6.313223959197274\n", + "[1.05392 0.69639 0.90198 0.40661] 5.192 5.513029193967952\n", + "[1.01528 0.61826 0.59448 0.32818] 5.888999999999999 4.963826790421297\n", + "[0.79273 0.36315 0.23906 0.22917] 3.9560000000000004 3.9243511389537744\n", + "[0.59207 0.36291 0.44025 0.46074] 4.369 4.270230196212314\n", + "[0.66801 0.46721 0.20824 0.19184] 3.681 3.7904687536157557\n", + "[1.00268 0.38215 0.2852 0.32878] 3.781 4.4146572856324005\n", + "[0.94632 0.73172 1.06098 0.22815] 5.332000000000001 5.255303251064371\n" + ] + } + ], + "source": [ + "#see the results\n", + "for each, actual_happiness, preds in zip(test_x, test_y, predictions):\n", + " print(each, actual_happiness, preds)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEWCAYAAABliCz2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3XuYHGWZ9/HvjyTCcByUeMgAiYCEXUAIRFaNIieJIAJGV2BFBQ+ouAuua1hwdT1v0Ky+67ovCKKAZwEhIiqHBWHRFTAQJCrkFZFDJkSCEAgQEeL9/lFPJ51murt60tWn+n2uq6/p7qquuqd65q6n7nrqKUUEZmY2+DbqdgBmZtYZTvhmZiXhhG9mVhJO+GZmJeGEb2ZWEk74ZmYl4YTfxyRNkxSSJqbXP5b0tnEsZ3tJj0ma0P4oB0va3jt1O46xpO9wh27HYb3LCb9gku6WtDr9M/5B0rmSNi9iXRFxSEScnzOmg6o+d29EbB4Ra4qIq10kfSwl3H1a+Mx6v2uRJF0r6Z017+0naWkn1p++w7s6sa5mJB0h6VZJj0p6UNLVkqZ1O66yc8LvjNdFxObAXsBLgA/XzqCMv486JAl4C/AQ0PJRjHVOOgL6GvBPwFbAC4EzgL+0cR3+fxkHb7AOiohR4MfAbrC2RfhpST8DngB2kLSVpK9Iul/SqKRPVUotkiZI+vfUYroLeG318mtbmJLeJel2Sask/UbSXpK+DmwP/CAddZwyRmloiqRLJT0k6U5J76pa5sckXSDpa2m5v5Y0s2r6P6e4V0laIunA2u0g6aWSlleXkCS9XtJtDTbfK4EpwMnA0ZKeVbPMvL/rM1rc1UcBkvaR9HNJK9N38F+169oQko6vivMuSe+umrafpKWSPpS+47slvblq+nmSviTpqvT56yRNrZq+ttyU5v2/kn6Y5r1R0o5V8+6SlvNQ+p7eVDXt0LQNV6Xv8oPp/W0kXZa2zUOSrq+TdPcEfh8RV0dmVUR8LyLuTcuZkH7H36V13CxpuzTt5ZJ+IemR9PPlVXG1+v+yU9pGj6Tt+d0N/f76XkT4UeADuBs4KD3fDvg18Mn0+lrgXmBXYCIwCVgAnAVsBjwXuAl4d5r/PcAdaTnPBn4CBDCxannvTM//FhglO6IQsBMwtTam9HpazXKuI2uRbUL2z7sCODBN+xjwJ+BQYAIwD7ghTZsO3AdMqVrujnW2y++AV1e9vhA4tcF2/ApwQdpGfwTmVE1r5XfdD1ja4DvaG3hp+j6mAbcD76+aN4Cd6sS4dvvXWx/ZTnrHFOeryBLXXlXzPg18Htg4TX8cmJ6mnwesAvZN078A/HSs2NK8DwH7pN/lm8B30rTN0vd0fJq2F/AgsGuafj/wyvR866r45gFfSt/BJLKdsMbYDjukv5H/A+wPbF4zfS6wOP29CNgDeA7Z3/TDZEdyE4Fj0uvnjPP/5dvAv5A1bDcBXtHtfNDtR9cDGPRHSiaPASuBe8gS6VCadi3wiap5nwc8WZme3jsG+El6fg3wnqppB1M/4V8BnNwgpjETPtnOZA2wRdX0ecB56fnHgP+umvbXwOr0fCfgAeAgYFKT7fIp4Kvp+RZkiW1qnXk3BR4FjkyvzwK+XzW9ld91Pxok/DE+/37gkqrXzRL+E+m7rjweq11fzWcWVGJnXcLfrGr6BcBH0vPzSEk7vd48fVfb1caW5j2nat5DgTvS86OA62viOAv4aHp+L/BuYMuaeT4BfL/e718z70tT7CvIkv95pMQPLAGOGOMzbwFuqnnv58Bx4/x/+RpwNrBtO/6XB+Hhkk5nHBkRwxExNSJOjIjVVdPuq3o+lazVcn86bF5J9o/43DR9Ss389zRY53ZkrehWTQEeiohVNesZqXq9vOr5E8AmkiZGxJ1kCfJjwAOSviNpSp31fAuYI2ljYA5wS0TU+31eT5YIf5RefxM4RNLk9Hq8v+szSNo5lS2WS3oU+DdgmxYWcVL6rocjYhg4rGb5h0i6IZVEVpIl4urlPxwRj1e9vofsO6lY+/1HxGNkrfh627j2e6p0FpgK/E3lbyzF8Wbg+Wn6G1Jc96SSyMvS+/OBO4ErUznq1HobISJuiIg3RcRksiOBfcla21D/+5rCM/+ma//2Wvl/OYXsCOKmVHp8e714y8IJv/uqhyu9j6zFsk1V0tgyInZN0+8n+2ep2L7Bcu8jKx00W2etZcCzJW1Rs57RBp9Zt+CIb0XEK8j+GQP4TJ35fkP2z3wI8HdkO4B63kaWrO6VtJys/DOJrDUHrf2uj5MdMQBZPRmYXDX9TLKy2YsiYkvgQ2RJY4Olndv3gH8Hnpd2CD+qWf7Wkjarer092XdSsfb7V9bb69k10/O4D7iuescUWQ+f9wJExC8i4giyxLmArKVOZLX4f4qIHYDXAR8Y6xxNrYj4BXAx6dwV9b+vZWR/N9Vq//Zy/79ExPKIeFdETCE7YjlDPdqltlOc8HtIRNwPXAl8TtKWkjaStKOkV6VZLgBOkrStpK2Bui0s4Bzgg5L2VmanqhN8fyCrs44Vw33A/wLzJG0i6cXAO8ha1Q1Jmi7pgJTY/gSsJis51PMt4CSy1t+FdZY5AhxI1lLeMz32INuRVHrrtPK7/j+yI5LXSppE1mNq46rpW5CVjx6TtAvw3ma/dwuelda1Anha0iFkZblaH5f0LEmvJPu9q7fNoZJeoexE8ieBG9N31orLgJ0lvUXSpPR4iaS/Sut9s6StIuIpsm2xBkDSYWnbqur9Z3y/Kb53SXpuer0LcDhwQ5rlHOCTkl6Uvq8XS3oO2c5vZ0l/J2mipKPISoaXjfVLNPt/kfS3krZNsz9MtrPo6a7HRXPC7z1vJUsMvyH7I70IeEGa9mWyevUvgVvIWk1jiogLgU+TJdVVZC21Z6fJ84APp8PgD47x8WPI6vrLgEvIartX5Yh9Y+B0shOAy8laiB9qMP+3yerW10TEg3XmeQtwa0RcmVpsyyNiOfCfwIsl7dbK7xoRjwAnkiWdUbIWf3WvnQ+SHXGsItvebevZkcpkJ5HtuB9O67m0Zrbladoysp3seyLijqrp3wI+SlbK2ZusFDOeOA4Gjk7rWU62A63s+N4C3J1KWu8Bjk3vvwj4b7LzEj8HzoiIa8dYxUqyBL9Y0mPA5WR/R59N0z9Ptg2uJNtxfIWsDv9Hsh3cP5GdmD8FOKzB3wY0/n95CXBjiuFSsnMlv2+yeQaaInwDFLNeIGk/4BsRsW2d6eeRnQB+xnUcZnm4hW9mVhJO+GZmJeGSjplZSbiFb2ZWEhO7HUC1bbbZJqZNm9btMMzM+sbNN9/8YLrArameSvjTpk1j4cKF3Q7DzKxvSGp0xf16XNIxMysJJ3wzs5JwwjczKwknfDOzknDCNzMrCSd8M7OS6KlumWZmZbFg0Sjzr1jCspWrmTI8xNzZ0zlyxkjzD24AJ3wzsw5bsGiU0y5ezOqnsuH5R1eu5rSLFwMUmvRd0jEz67D5VyxZm+wrVj+1hvlXLCl0vU74ZmYdtmzl6pbebxeXdMzMClKvTj9leIjRMZL7lOGhQuNxC9/MrACVOv3oytUE6+r0CxaNMnf2dIYmTVhv/qFJE5g7e3qhMTnhm5kVoFGd/sgZI8ybszsjw0MIGBkeYt6c3d1Lx8ysHzWr0x85Y6TwBF/LLXwzswLUq8cXXadvxAnfzKwA3arTN+KSjplZASrlmk5fTduIE76ZWUG6UadvpLCSjqTpkm6tejwq6f1Frc/MzBorrIUfEUuAPQEkTQBGgUuKWp+Z9ZZuDA5mjXWqpHMg8LuIyH2zXTPrX90aHMwa61TCPxr4dofWZWZd1uyio0HRb0cxhXfLlPQs4HDgwjrTT5C0UNLCFStWFB2OmXVAtwYH66RGQyf0qk608A8BbomIP4w1MSLOBs4GmDlzZnQgHjMrWLcGByvKWC35fjyK6cSFV8fgco5ZqfTiRUfjVa8lP9YODXr7KKbQFr6kTYFXA+8ucj1m1lt68aKj8arXkp8gsSaeWZTo5aOYQhN+RDwBPKfIdZhZb+q1i47Gq16LfU0EQ5MmrLcz6PWjGI+lY2bWQL0We2VI404PcbwhPLSCmfW0bnd9nDt7+nrXFMC6lny/HcU44ZtZz+qFC7gG6XyEE76Z9axe6frYby35elzDN7OeVYYLuDrJCd/MelYv3jWqnznhm1nPGqQLuHqBa/hm1rMG6YRpL3DCN7OeNignTHuBSzpmZiXhhG9mVhIu6ZhZ13T7KtqyccI3s67ohatoy8YlHTPrikZX0Vox3MI3s65o9Spal382nBO+mXVFK7dBzFP+8Q6hOZd0zKwrWrmKtln5px9vKN4NTvhm1hVHzhjJfQORZuUfnw/IxyUdM+uavFfRNiv/eFTNfNzCN7Oe16z841E182ma8CXNkrRZen6spM9Lmlp8aGZmmWblH4+qmU+eks6ZwB6S9gBOAb4CfA14VZGBmZlVa1T+8aia+eRJ+E9HREg6AvhCRHxF0tuKDszMrBUeVbO5PAl/laTTgLcAr5Q0AZhUbFhmZtZueU7aHgU8Cbw9IpYDI8D8QqMyM7O2a5rwU5L/HrBxeutB4JIigzIzs/bL00vnXcBFwFnprRFgQZFBmZlZ++Up6bwPmAU8ChARvwWeW2RQZmbWfnkS/pMR8efKC0kTgSguJDMzK0KehH+dpA8BQ5JeDVwI/KDYsMzMrN3yJPxTgRXAYuDdwI+ADxcZlJmZtV/Dfvipz/35EXEs8OXOhGRmZkVo2MKPiDXAZEnP6lA8ZmZWkDxX2t4N/EzSpcDjlTcj4vNFBWVmZu2XJ+EvS4+NgC2KDcfMzIrSNOFHxMcBJG2RvYzHCo/KzMzaLs+VtrtJWgT8Cvi1pJsl7Zpn4ZKGJV0k6Q5Jt0t62YYGbGZm45OnpHM28IGI+AmApP3Ieuy8PMdnvwBcHhFvTCd+Nx1voGZmnbJg0ehAjq2fJ+FvVkn2ABFxbeUOWI1I2hLYFzgufe7PwJ8bfcbMrNsWLBrltIsXr70p+ujK1Zx28WKAvk/6eS68ukvSRyRNS48PA7/P8bkdyC7YOlfSIknnjLWjkHSCpIWSFq5YsaLF8M3M2mv+FUvWJvuK1U+tYf4VS7oUUfvkSfhvByYDF6fHNsDxOT43EdgLODMiZpB16Ty1dqaIODsiZkbEzMmTJ+cO3MysCMtWrm7p/X6Sp5fOw8BJ41j2UmBpRNyYXl/EGAnfrJ8Mam3X1pkyPMToGMl9yvBQF6Jprzy9dK6SNFz1emtJVzT7XLpxyn2SKreNPxD4zbgjNeuySm13dOVqgnW13QWLRrsdmrXR3NnTGZo0Yb33hiZNYO7s6XU+0T/ylHS2iYiVlRepxZ93PPx/AL4p6TZgT+DfWg/RrDcMcm3X1jlyxgjz5uzOyPAQAkaGh5g3Z/eBOJLL00vnL5K2j4h7ASRNJed4+BFxKzBzA+Iz6xmDWNt1iWpsR84YGcjtkCfh/wvwU0nXpdf7AicUF5JZbxq02u4gdz+0seW5ifnlZL1tvpsee0dE0xq+2aAZtNquS1TlUzfhS5oqaSuAiHiQrFvlq4G3erhkK6NBq+0OYonKGmtU0rkAeD3wiKQ9yW5tOA/YAzgDeGfx4Zn1lkGq7Q5aicqaa1TSGYqIZen5scBXI+JzZBdd7VN4ZGZWqEErUVlzjVr4qnp+AHAaQET8RdLYnzCzvlE5UumXXjruUbThGiX8ayRdANwPbA1cAyDpBXgQNLOB0C8lKvcoao9GJZ33k42dczfwioh4Kr3/fLKummZmHeEeRe1Rt4UfEQF8Z4z3FxUakZlZDfcoao88QyuYmXVVvZ5D7lHUGid8M+t57lHUHnmGVlhL0tbAdhFxW0HxmJk9Q7/1KOpVTRO+pGuBw9O8twIrJF0XER8oODYz65Je7ALZLz2Kelmeks5WEfEoMAc4NyL2Bg4qNiwz6xaP+z+48iT8ianv/ZuAywqOx8y6zF0gB1eehP8J4Argzoj4haQdgN8WG5aZdYu7QA6uPPe0vZBs4LTK67uANxQZlJl1jwdVG1x57mn7WUlbSpok6WpJD0o6thPBmVnnuQvk4MpT0jk4nbQ9DFgK7AzMLTQqM+uaQRv339bJ0w9/Uvp5KPDtiHjIo2WaDTZ3gRxMeRL+DyTdAawGTpQ0GfhTsWGZmVm75Tlpe6qkzwCPRsQaSU8ARxQfmll59OKFTjZ48py03RR4H3BmemsKMLPIoMzKxBc6WafkOWl7LtkNT16eXi8FPlVYRGYl4wudrFPyJPwdI+KzwFMAEbGa9W9/aGYbwBc6WafkSfh/ljQEBICkHYEnC43KrEQ81rt1Sp6E/1HgcmA7Sd8ErgZOKTQqsxLxhU7WKXl66Vwl6RbgpWSlnJMj4sHCIzMrCY/1bp2S9wYomwAPp/n/WhIR8T/FhWVWLu2+0MndPG0seW6A8hngKODXwF/S2wE44Zv1oEo3z0rPn0o3T8BJv+TytPCPBKZHhE/UmvWBRt08nfDLLc9J27tYN56OmfU4d/O0evK08J8AbpV0NVXdMSPipMKiMrNx83j2Vk+ehH9pephZH5g7e/p6NXxwN0/L5OmWeX4nAjFrhXuh1OdunlZP3YQv6YKIeJOkxaSrbKtFxIsLjcysDvdCac7j2dtYGrXwT04/DxvvwiXdDawC1gBPR4RH2bQN5l4oZuNTN+FHxP3p5z2Sng/sQ9bS/0VELG9hHfv7ylxrJ/dCMRufPOPhvxO4CZgDvBG4QdLbiw7MOmPBolFmnX4NLzz1h8w6/Zq+GIPdg42ZjU+eXjpzgRkR8UcASc8B/hf4ao7PBnClpADOioiza2eQdAJwAsD222+fN25rg36thRfVC8Ungm3Q5bnwailZHb5iFXBfzuXPioi9gEOA90nat3aGiDg7ImZGxMzJkyfnXKy1Q7/eeOPIGSPMm7M7I8NDCBgZHmLenN03KDn7rlNWBnla+KPAjZK+T9ZiPwK4SdIHACLi8/U+GBHL0s8HJF1Cdh7AY/D0iHbWwjvdOm53LxSfCLYyyJPwf5ceFd9PP7do9CFJmwEbRcSq9Pxg4BPjitIK0a4rMvu1NFRtvDs/l4Gsn+S58OrjAJK2zF7GqiYfqXgecImkynq+FRGXjzdQa7921cIHoXU8np3fIOzorFzy9NKZmS6+ug1YLOmXkvZu9rmIuCsi9kiPXSPi0+0I2NqnXbXwQegmOZ67TvXrORArrzwlna8CJ0bE9QCSXgGcC/hK2wHQjlr4IAzWNZ7hCAZhR2flkifhr6oke4CI+KmkvGUdK4FBGayr1Z3fIOzorFzydMu8SdJZkvaT9CpJZwDXStpL0l5FB2i9r4hukv3ANx+3fqOIZ4yLtv4M0k8aTI6IOKBdwcycOTMWLlzYrsWZFc69dKzbJN2cd5yyPL109t/wkMwGk0eltH6Sp4aPpNcCuwKbVN6LCPept4HgVrqVRdOEL+lLwKbA/sA5ZAOo3VRwXGYd4b70ViZ5Ttq+PCLeCjycLsJ6GbBdsWGZdYb70luZ5En4lX5nT0iaAjwFvLC4kMw6x33prUzyJPzLJA0D84FbgLuB7xQZlFmneGx9K5OmCT8iPhkRKyPie8BUYJeI+EjxoZkVz33prUwa3cR8ToNpRMTFxYRk1jnjGVLBrF816qXzuprnP6h6HYATvg0E96W3smh0E/PjK88lLap+beXmfutm/SnXhVdkLXoz91s362N5eumYreV+62b9q9FJ2x+wrmW/g6RLq6dHxOFFBma9yf3WzfpXo5LOv1c9/1zRgVh/8BjwZv2r0Unb6zoZiPWHQbnZiVkZ5T1pawa437pZP3PCt5bl7bfu7ptmvcUJ3wrh7ptmvSdvL51ncC8da6RZ9023/M06L28vHbOW1OumWWnpu+Vv1nnupWOFqNd9c4JUt+XvhG9WrKZX2kp6kaSLJP1G0l2VRyeCs/5Vb9jhNTF2ldAXbpkVL8/QCucCZwJPk93X9mvA14sMyvrfkTNGmDdnd0aGhxAwMjy09vVYfOGWWfHy9NIZioirJSki7gE+Jul64KMFx2Z9rl73zaIu3HI3ULPG8iT8P0naCPitpL8HRoHnFhuWDaqiLtxyN1Cz5hR1aqprZ5BeAtwODAOfBLYCPhsRN7Q7mJkzZ8bChQvbvVgrgVmnXzPmSeKR4SF+duoBXYjIrDMk3RwRM/PM27SFHxG/SE8fA3wTFOtJHsXTrLmmCV/STxjjAqyIcLPJeoZH8TRrLk8N/4NVzzcB3kDWY8esZ3gUT7Pm8pR0bq5562eSfFGW9RSP4mnWXJ6SzrOrXm4E7A08v7CIzMYp7yieZmWVp6RzM1kNX2SlnN8D7ygyKDMza788Cf+vIuJP1W9I2jjvCiRNABYCoxFxWIvxmZlZm+QZWuF/x3jv5y2s42SyfvxmZtZFjcbDfz4wAgxJmkFW0gHYEtg0z8IlbQu8Fvg08IENC9XaxUMQmJVTo5LObOA4YFvgc6xL+I8CH8q5/P8ATgG2qDeDpBOAEwC23377nIu18SrTEATesZmtr25JJyLOj4j9geMi4oCI2D89joiIi5stWNJhwANjdOusXc/ZETEzImZOnjy59d/AWtLsTlSDorJjG125mmDdjm3BotFuh2bWNXlq+HtLGq68kLS1pE/l+Nws4HBJdwPfAQ6Q9I3xhWntUpYhCMqyYzNrRZ6Ef0hErKy8iIiHgUObfSgiTouIbSNiGnA0cE1EHDvuSK0t6g01MGhDEJRlx2bWijwJf0J1N0xJQ0DubpnWW+rdiWrQhiAoy47NrBV5Ev43gKslvUPS24GryO56lVtEXOs++L2h3p2oBu1kZll2bGataDoePoCk1wAHkfXUuTIirigiGI+Hb+3kXjpWBq2Mh58r4dcsfBbwdxHxvvEE14gTfnk5OZuNT1tvgJIWuCdwDHAU2Vg6TbtlmuVVpmsDzLqpbg1f0s6S/lXS7cB/AUvJjgj2j4gvdixCG3juQmnWGY1a+HcA1wOvi4g7AST9Y0eislJxF0qzzmiU8N9A1n/+J5IuJ7t4Sg3mty7r1zq4b09o1hmNhla4JCKOAnYBrgX+EXiepDMlHdyh+Cynfh5KwF0ozTqjaT/8iHg8Ir6Z+tFvC9wKnFp4ZNaSXq+DL1g0yqzTr+GFp/6QWadfs96OqCzXBph1W65eOhUR8RBwVnpYD+nlOnieXji+PaFZ8fJcaWt9oJeHEuj1ow+zsnDCHxC9XAfv5aMPszJxwh8QvVwH7+WjD7MyaamGb72tV+vgc2dPX6+GD71z9GFWJk74VrjKTqgfrxEwGyRO+NYRvXr0YVYmruGbmZWEE76ZWUk44ZuZlYQTvplZSTjhm5mVhBO+mVlJOOGbmZWEE76ZWUk44ZuZlYQTvplZSTjhm5mVhBO+mVlJOOGbmZWEE76ZWUk44ZuZlYQTvplZSTjhm5mVhBO+mVlJOOGbmZWEE76ZWUk44ZuZlURhCV/SJpJukvRLSb+W9PGi1mVmZs1NLHDZTwIHRMRjkiYBP5X044i4ocB1mplZHYUl/IgI4LH0clJ6RFHrMzOzxgqt4UuaIOlW4AHgqoi4cYx5TpC0UNLCFStWFBmOmVmpFVnSISLWAHtKGgYukbRbRPyqZp6zgbMBZs6c2fIRwIJFo8y/YgnLVq5myvAQc2dP58gZI22J38xskHSkl05ErASuBV7TzuUuWDTKaRcvZnTlagIYXbma0y5ezIJFo+1cjZnZQCiyl87k1LJH0hBwEHBHO9cx/4olrH5qzXrvrX5qDfOvWNLO1ZiZDYQiSzovAM6XNIFsx3JBRFzWzhUsW7m6pffNzMqsyF46twEzilo+wJThIUbHSO5ThoeKXK2ZWV/q6ytt586eztCkCeu9NzRpAnNnT+9SRGZmvavQXjpFq/TGcS8dM7Pm+jrhQ5b0neDNzJrr65KOmZnl54RvZlYSTvhmZiXhhG9mVhJO+GZmJeGEb2ZWEsqGre8NklYA97R5sdsAD7Z5me3Sq7E5rtY4rtY4rtY0i2tqREzOs6CeSvhFkLQwImZ2O46x9Gpsjqs1jqs1jqs17YzLJR0zs5JwwjczK4kyJPyzux1AA70am+NqjeNqjeNqTdviGvgavpmZZcrQwjczM5zwzcxKYyASvqRNJN0k6ZeSfi3p42PMs7Gk70q6U9KNkqb1SFzHSVoh6db0eGfRcVWte4KkRZKecevJbmyvnHF1c3vdLWlxWu/CMaZL0n+mbXabpL16JK79JD1Stc3+tUNxDUu6SNIdkm6X9LKa6d3aXs3i6vj2kjS9an23SnpU0vtr5tng7dX34+EnTwIHRMRjkiYBP5X044i4oWqedwAPR8ROko4GPgMc1QNxAXw3Iv6+4FjGcjJwO7DlGNO6sb3yxAXd214A+0dEvYtgDgFelB5/A5yZfnY7LoDrI+KwDsVS8QXg8oh4o6RnAZvWTO/W9moWF3R4e0XEEmBPyBo8wChwSc1sG7y9BqKFH5nH0stJ6VF7NvoI4Pz0/CLgQEnqgbi6QtK2wGuBc+rM0vHtlTOuXnYE8LX0vd8ADEt6QbeD6gZJWwL7Al8BiIg/R8TKmtk6vr1yxtVtBwK/i4jaUQc2eHsNRMKHtWWAW4EHgKsi4saaWUaA+wAi4mngEeA5PRAXwBvSIdpFkrYrOqbkP4BTgL/Umd6V7ZUjLujO9oJsZ32lpJslnTDG9LXbLFma3ut2XAAvS6XFH0vatQMx7QCsAM5N5blzJG1WM083tleeuKDz26va0cC3x3h/g7fXwCT8iFgTEXsC2wL7SNqtZpaxWqeFt7ZzxPUDYFpEvBj4b9a1qgsj6TDggYi4udFsY7xX6PbKGVfHt1eVWRGxF9mh9fsk7VszvSt/YzSP6xay8Vb2AL4ILOhATBOBvYAzI2IG8Dhwas083dheeeLqxvYCIJWYDgcuHGvyGO+1tL0GJuFXpMOza4HX1ExaCmwHIGkisBXwULfjiog/RsST6eWXgb07EM4s4HBJdwPfAQ6Q9I2aebqxvZrG1aXtVVn3svTzAbL66j41s6zdZsm2wLJuxxURj1ZKixHxI2CSpG0KDmspsLTqiPYiskRbO0+nt1fTuLq0vSoOAW6JiD/8YqKGAAADzklEQVSMMW2Dt9dAJHxJkyUNp+dDwEHAHTWzXQq8LT1/I3BNFHzVWZ64ampwh5OdrCxURJwWEdtGxDSyw8drIuLYmtk6vr3yxNWN7ZXWu5mkLSrPgYOBX9XMdinw1tSb4qXAIxFxf7fjkvT8yvkXSfuQ/d//sci4ImI5cJ+k6emtA4Hf1MzW8e2VJ65ubK8qxzB2OQfasL0GpZfOC4Dz09ntjYALIuIySZ8AFkbEpWQnab4u6U6ylurRPRLXSZIOB55OcR3XgbjG1APbK09c3dpezwMuSXlgIvCtiLhc0nsAIuJLwI+AQ4E7gSeA43skrjcC75X0NLAaOLronXfyD8A3U5niLuD4HtheeeLqyvaStCnwauDdVe+1dXt5aAUzs5IYiJKOmZk154RvZlYSTvhmZiXhhG9mVhJO+GZmJeGEb31L0hplIwv+StKFqVvbeJe1n9LonJIOl1R79WX1vMOSTqx6PUXSReNdt1mnOOFbP1sdEXtGxG7An4H3VE9MF6i0/DceEZdGxOkNZhkGTqyaf1lEvLHV9Zh1mhO+DYrrgZ0kTVM2xvkZZGOibCfpYEk/l3RLOhLYHEDSa5SNif5TYE5lQcrG3P+v9Px5ki5JA2n9UtLLgdOBHdPRxfy0zl+l+TeRdK6y8ekXSdq/apkXS7pc0m8lfTa9P0HSeekoZbGkf+zkRrNyGZQrba3E0lg/hwCXp7emA8dHxIlpDJQPAwdFxOOS/hn4QEq4XwYOILty8bt1Fv+fwHUR8fp0xfTmZINt7ZYGxUPr3xzmfQARsbukXchGsdw5TdsTmEF2n4Qlkr4IPBcYSUcpVIbiMCuCW/jWz4aUDT29ELiXNMY5cE/VTWZeCvw18LM079uAqcAuwO8j4rfpsvnaweMqDiC70URl5NNHmsT0CuDraf47gHuASsK/OiIeiYg/kY3fMpXs0v4dJH1R0muAR/P/+matcQvf+tnqSiu7Io0p83j1W2T3ITimZr49KWYo3kY3iXmy6vkaYGJEPCxpD2A22dHBm4C3FxCXmVv4NvBuAGZJ2gmyAapSieUO4IWSdkzzHVPn81cD702fnaDsjkmrgC3qzP8/wJvT/DsD2wNL6gWXSk4bRcT3gI/wzCGEzdrGCd8GWkSsIBtR89uSbiPbAeySyionAD9MJ21rbydXcTKwv6TFwM3ArhHxR7IS0a8kza+Z/wxgQpr/u8BxVeP3j2UEuDaVm84DThvP72mWh0fLNDMrCbfwzcxKwgnfzKwknPDNzErCCd/MrCSc8M3MSsIJ38ysJJzwzcxK4v8DwCjXRIAf1ZkAAAAASUVORK5CYII=\n", + "text/plain": [ + "<Figure size 432x288 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "%matplotlib inline\n", + "import matplotlib.pyplot as plt\n", + "\n", + "plt.scatter(lin_reg_model.predict(test_x), test_y)\n", + "plt.title('Predictions v Actual Happiness Scores')\n", + "plt.xlabel('Predictions')\n", + "plt.ylabel('Actual Happiness Scores');" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Exercise\n", + "* Create Simple and Multiple Linear Regression Models trained on stratified training sets\n", + "* Evaluate the models and compare the results to those presented in this notebook\n", + "* Are the results better, worse or the same? Why?" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} -- GitLab