diff --git a/Amrin_ML.ipynb b/Amrin_ML.ipynb new file mode 100644 index 000000000..6e6d2a6ea --- /dev/null +++ b/Amrin_ML.ipynb @@ -0,0 +1,983 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Regression Intuitions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "***Simple Linear Regression:*** A Simple linear equation models a function such that if we give any x to it, it will predict a value y , where both x and y are input and output variables respectively. These are numerical and continuous values. It is the most simple and well known algorithm used in machine learning.\n", + "\n", + "**y = mx + c**\n", + "\n", + "***Multiple Linear Regression:*** A multiple linear equation models a function such that if we give more than one feature variable like x1 x2 ….xi to it, it will predict the target output value y .\n", + "\n", + "**yi=β0+β1xi1+β2xi2+...+βpxip+ϵ ;** where, for i=n observations:\n", + "\n", + "*We are using multiple linear regression model:*\n", + "\n", + "​" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Importing the libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Importing the dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 123, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
R&D SpendAdministrationMarketing SpendStateProfit
0165349.20136897.80471784.10New York192261.83
1162597.70151377.59443898.53California191792.06
2153441.51101145.55407934.54Florida191050.39
3144372.41118671.85383199.62New York182901.99
4142107.3491391.77366168.42Florida166187.94
\n", + "
" + ], + "text/plain": [ + " R&D Spend Administration Marketing Spend State Profit\n", + "0 165349.20 136897.80 471784.10 New York 192261.83\n", + "1 162597.70 151377.59 443898.53 California 191792.06\n", + "2 153441.51 101145.55 407934.54 Florida 191050.39\n", + "3 144372.41 118671.85 383199.62 New York 182901.99\n", + "4 142107.34 91391.77 366168.42 Florida 166187.94" + ] + }, + "execution_count": 123, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv('50_Startups.csv')\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "50" + ] + }, + "execution_count": 124, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 125, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(50, 5)" + ] + }, + "execution_count": 125, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Plotting the dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.scatter(df['Marketing Spend'], df['Profit'])\n", + "#plt.plot(X_train, regressor.predict(X_train), color = 'blue')\n", + "plt.title('Marketing Spend vs Profit')\n", + "plt.xlabel('Marketing Spend')\n", + "plt.ylabel('Profit')\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.scatter(df['R&D Spend'], df['Profit'])\n", + "#plt.plot(X_train, regressor.predict(X_train), color = 'blue')\n", + "plt.title('R&D Spend vs Profit')\n", + "plt.xlabel('R&D Spend')\n", + "plt.ylabel('Profit')\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZgAAAEWCAYAAABbgYH9AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAqeUlEQVR4nO3de5xdZX3v8c/XJMZRhCQQMUyAxHKpKJo0U0oPhYNiSbQ9EjhYQlVipY1S6l2OifYliPUYpMqR4xGLhXJRISli4CgpRiPSWm4TAoQAMRGCTBIhkICgkZOE3/ljPZusGfbszGT22nutme/79dqvWfNbl3nWnpn1289lPUsRgZmZWbO9rN0FMDOz4ckJxszMCuEEY2ZmhXCCMTOzQjjBmJlZIZxgzMysEE4wVmmSrpD0D3u476cl/XOzty2CpKWS5rbr57eSpH+Q9KSkX0k6SNJzkka1u1w2ePJ9MFZGkm4B3gy8NiKeb7DdFUBPRPx9i4rWUCr3tyJij5ORpPOAQyLiPc0qV5EkHQ8sB34LBLARWBgR/7IHxzoQ+DlwcEQ8UWf9LQzx/bXWcQ3GSkfSFOBYsovVO9tbmuaSNLrdZSjIxojYC9gb+BTwTUlH9N1oAOd/MPBUveRi1eMEY2V0BnA7cAXQq1lI0nRJd0t6VtIi4BW5dcdL6pH0PyQ9IWmTpNmS3iHp55K2SPp0bvvzJH0rLU+RFJLmSvplaqL5TD/bvkLStyQ9JelpSXdJ2l/SF8gS49dSs87X0vYh6WxJa4G1KfZVSY9J+rWkFZKOTfFZwKeB09Ix7k3xWyT9dVp+maS/l/RoOs+rJO0zkPPo814enZqhRuViJ0u6Ly0fJak7lfFxSV/Z3S8uMkuArcARkt4n6WeSLpK0BThP0j6pzJvTOfx9Oqe3AcuAA9K5X5E7n9H9vb9WYhHhl1+legHrgL8FZgDbgf1T/OXAo8DHgDHAqWn9P6T1xwM7gM+m9X8DbAa+A7waeAPwO+B1afvzyJpbAKaQ1Zi+CXSQNc89D7y+zrYfAP4v8EpgVCrn3mndLcBf9zmfILtwTgA6Uuw9wL7AaOATwK+AV/T9WbljvHhc4P3pPXodsBdwPXD1QM6jznv9C+BPc9//KzA/Ld8GvDct7wUc3c8xjidrpoTsQ+vJ6fdyOPC+9Dv5UDrXDuAq4Ib0O5lC1iR2Zt9j9Tmf0f29v36V9+UajJWKpD8hayZZHBEryC6Af5lWH02WOP5XRGyPiOuAu/ocYjvwhYjYDlwL7Ad8NSKejYjVwGrgTQ2K8LmI2BYR9wL3kl2g+9pOlhwOiYidEbEiIn69m1P7YkRsiYhtABHxrYh4KiJ2RMSXgbFkF+SBeDfwlYh4OCKeAxYAc/o0Pw3kPACuAU4HkPRq4B0pVjvPQyTtFxHPRcTtDcp0gKSngSeBc8kS05q0bmNE/O+I2AH8P+A0YEH6nawHvgy8d4DnbhXiBGNlMxf4YUQ8mb7/DruayQ4ANkREfmTKo332fyoidqblbenr47n128g+jffnV7nl3/az7dXAzcC1kjZK+pKkMQ2OCfBY/htJn5D0oKRn0oV5H7JkOBAH0Pu8HyWrHeyfiw3kPCB7f0+RNBY4Bbg7ImrHPhM4DHgoNQP+eYMybYyIcRExISKmRcS1uXX5c9+PXTXRfPk7GxzbKmq4djhaBUnqAP4CGCWpdoEcC4yT9GZgE9ApSbkkcxBZLadlUu3oc8Dn0oCEm4A1wGVkzTl1d6stpP6WTwEnAKsj4gVJWwH13bYfG8lqeTUHkTVDPQ5MHuS5PCDpUeDtZDXF7+TWrQVOl/QysuRznaR9I+I3g/kZ9D6fJ8lqRgcDD+TKv2EPjmUl5xqMlclsYCdwBDAtvV4P/DtZx/9tZBfSD6dO31OAo1pdSElvkXRk6hz/NdkFs1Zrepysb6SRV5Odx2ZgtKTPko2+qnkcmJIu7PVcA3xM0lRJewH/E1iUmqD2xHeADwPHkfXBACDpPZImRsQLwNMpvPOluw9cql0uBr4g6dWSDgY+DnxrgIcYyPtrJeEEY2UyF/iXiPhlRPyq9gK+Rtbv8ALZJ+n3kY1SOo2sg7vVXgtcR5ZcHgR+yq4L5FeBUyVtlXRxP/vfDCwl69x+lGzgQb4ZqXaRf0rS3XX2v5ysme5W4JG0/4f2+GyyhHU8sDzXNAkwC1gt6Tmy85oTEb8bws+p+RDwG+Bh4D/IEtzlA9x3IO+vlYRvtDQzs0K4BmNmZoVwgjEzs0IUlmAkHSjpJ2ko5mpJH0nxCZKWSVqbvo7P7bNA0jpJayTNzMVnSFqV1l0sSSk+VtKiFL8jjeip7TM3/Yy1GiGTBJqZlUmRNZgdwCci4vVkN8idrWxuovnAjyPiUODH6XvSujlkd1vPAr6em8LiEmAecGh6zUrxM4GtEXEIcBFwQTrWBLKbvf6IbJTRuflEZmZmxSvsPpiI2ER23wIR8aykB8lupjqJbMQKwJVkUz98KsWvjWzm3EckrQOOkrSebBqO2wAkXUU2nHVp2ue8dKzryOYoEjATWBYRW9I+y8iSUu0O5ZfYb7/9YsqUKUM/cTOzEWTFihVPRsTEeutacqNlarqaDtxBNq9ULfFskvSatFkn2QSHNT0ptj0t943X9nksHWuHpGfIpvB4MV5nn7qmTJlCd3f3oM/NzGwkSzfq1lV4J3+6Eey7wEd3M1+T6sSiQXxP98mXbV6aLbZ78+bNDYpmZmaDVWiCSfMzfRf4dkTUboh7XNKktH4SUHvuQw9wYG73yWRTYvTQe/qLWrzXPmmiv32ALQ2O1UtEXBoRXRHRNXFi3RqemZntoSJHkYlsbqYHIyL/HIkb2TV54Vyyabtr8TlpZNhUss78O1Nz2rPKnl0hsilDbqhzrFPJ7kQOsjulT5Q0PnXun5hiZmbWIkX2wRxDNgX3Kkn3pNingYXAYklnAr8E3gUQEaslLSabAG8HcHZuVtyzyB4+1UHWub80xS8Drk4DAraQjUIjIrZI+jy7pnI/v9bhb2ZmreGpYpKurq5wJ7+Z2eBIWhERXfXWebp+M2u6JSs3cOHNa9j49DYOGNfBOTMPZ/Z0P/JlpHGCMbOmWrJyAwuuX8W27VkL94ant7Hg+lUALU8yTnTt5QRjlvhi1BwX3rzmxeRSs237Ti68eU1L388yJbqRypNdmrHrYrTh6W0Euy5GS1YO9EGLVrPx6W2DihelUaKz1nCCMcMXo2Y6YFzHoOJFKUuiG8mcYMzwxaiZzpl5OB1jRvWKdYwZxTkzD29pOcqS6EYyJxgzfDFqptnTO/niKUfSOa4DAZ3jOvjiKUe2vN+jLIluJHMnvxnZxSjfIQy+GA3F7Omdbe9Ir/18D9xoHycYM3wxGq7KkOhGMicYs8QXI7Pmch+MmZkVwjUYswrxzaBWJU4wZhXhO9OtatxEZlYRvhnUqsYJxqwifDOoVY0TjFlF+GZQqxonGLOK8J3pVjXu5DerCN8MalXjBGNWIb4Z1KqksCYySZdLekLS/bnYIkn3pNd6Sfek+BRJ23LrvpHbZ4akVZLWSbpYklJ8bDreOkl3SJqS22eupLXpNbeoczQzs/4VWYO5AvgacFUtEBGn1ZYlfRl4Jrf9LyJiWp3jXALMA24HbgJmAUuBM4GtEXGIpDnABcBpkiYA5wJdQAArJN0YEVubd2pmZrY7hdVgIuJWYEu9dakW8hfANY2OIWkSsHdE3BYRQZasZqfVJwFXpuXrgBPScWcCyyJiS0oqy8iSkpmZtVC7RpEdCzweEWtzsamSVkr6qaRjU6wT6Mlt05NitXWPAUTEDrLa0L75eJ19epE0T1K3pO7NmzcP9ZzMzCynXQnmdHrXXjYBB0XEdODjwHck7Q2ozr6Rvva3rtE+vYMRl0ZEV0R0TZw4ccCFNzOz3Wt5gpE0GjgFWFSLRcTzEfFUWl4B/AI4jKz2MTm3+2RgY1ruAQ7MHXMfsia5F+N19jEzsxZpRw3mbcBDEfFi05ekiZJGpeXXAYcCD0fEJuBZSUen/pUzgBvSbjcCtRFipwLLUz/NzcCJksZLGg+cmGJmZtZChY0ik3QNcDywn6Qe4NyIuAyYw0s7948Dzpe0A9gJfDAiagMEziIbkdZBNnpsaYpfBlwtaR1ZzWUOQERskfR54K603fm5Y5mZWYso+9BvXV1d0d3d3e5imJlViqQVEdFVb53nIjMzs0J4qhirHD/V0awanGCsUvxUR7PqcBOZVYqf6mhWHU4wVil+qqNZdTjBWKX4qY5m1eEEY5XipzqaVYc7+a1S/FRHs+pwgrHK8VMdzarBTWRmZlYIJxgzMyuEE4yZmRXCfTA2onnamWry760anGBsxPK0M9WRTyjjXjmG5363g+0vZDPB+/dWXm4isxHL085UQ+2DwIantxHA1t9ufzG51Pj3Vk5OMDZiedqZaqj3QaAe/97KxwnGRixPO1MNA00c/r2VjxOMjViedqYaBpI4RNYXc8zC5SxZuaH4QtmAOMHYiDV7eidfPOVIOsd1IKBzXAdfPOVIdxSXTL0PAmNGiXEdY4AsudR6ZGod/k4y5VBYgpF0uaQnJN2fi50naYOke9LrHbl1CyStk7RG0sxcfIakVWndxZKU4mMlLUrxOyRNye0zV9La9Jpb1Dla9c2e3snP5r+VRxb+GT+b/1YnlxKq90HgwlPfzD3nnkjnuA6iz/bu8C+PIocpXwF8DbiqT/yiiPjHfEDSEcAc4A3AAcCPJB0WETuBS4B5wO3ATcAsYClwJrA1Ig6RNAe4ADhN0gTgXKCL7IPNCkk3RsTWYk7TrDffo9F8/c0/54Ea5VZYDSYibgW2DHDzk4BrI+L5iHgEWAccJWkSsHdE3BYRQZasZuf2uTItXweckGo3M4FlEbElJZVlZEnJrHB9h9S6yaZYHqhRbu3og/k7SfelJrTxKdYJPJbbpifFOtNy33ivfSJiB/AMsG+DY72EpHmSuiV1b968eWhnZYbvrWk1D9Qot1YnmEuA3wOmAZuAL6e46mwbDeJ7uk/vYMSlEdEVEV0TJ05sUGyzgXGTTWt5oEa5tXSqmIh4vLYs6ZvA99O3PcCBuU0nAxtTfHKdeH6fHkmjgX3ImuR6gOP77HNLs87BrJEDxnWwoU4yeZnE1Pk/cJ9MAfx8oPJqaQ0m9anUnAzURpjdCMxJI8OmAocCd0bEJuBZSUen/pUzgBty+9RGiJ0KLE/9NDcDJ0oan5rgTkwxs8LVa7IB2BnhPhkbcQqrwUi6hqwmsZ+kHrKRXcdLmkbWZLUe+ABARKyWtBh4ANgBnJ1GkAGcRTYirYNs9NjSFL8MuFrSOrKay5x0rC2SPg/clbY7PyIGOtjAKqKsI7X6PtL5ZRI7o/68WWUor1mRFFG3e2LE6erqiu7u7nYXwwag7yzIkHXslrHtfer8H9TtABTwyMI/a3VxzJpO0oqI6Kq3znfyW+VUaaSWh9HaSOYEY5VTpZFaZR9Gu2TlBo5ZuJyp83/gebys6fzAMauc/kZqlbFW0LdPpkz9RX7gmhXNCcYq55yZh9ftgylLraCvsg6jbdTUWMbyWvU4wVjllLlWUCVVamq0anKCsUoqa62gSqrU1GjV5E5+sxGq7AMQrPpcgzEbodzUaEVzgjEbwdzUaEVygjGzpijr9D3WPk4wZjZkvqfG6nGCMbMhGy731LgW1lxOMGY2ZMPhnhrXwprPw5TNbMiGw6SeVZpEtSqcYMyGmXZMYDkc7qkZDrWwsnETmdkw0q5mnuFwT41nNmg+JxizYaSdne1Vv6emapOoVoETjNkwsWTlhrqfwMHNPAMxHGphZeMEYzYM1JrG+uNmnoGpei2sbArr5Jd0uaQnJN2fi10o6SFJ90n6nqRxKT5F0jZJ96TXN3L7zJC0StI6SRdLUoqPlbQoxe+QNCW3z1xJa9NrblHnaFYW9ZrGaqrezOOnblZXkaPIrgBm9YktA94YEW8Cfg4syK37RURMS68P5uKXAPOAQ9Ordswzga0RcQhwEXABgKQJwLnAHwFHAedKGt/MEzNrpmZcQBs1gX3xlCMr+6m8VjPb8PQ2gl2DFpxkqqGwBBMRtwJb+sR+GBE70re3A5MbHUPSJGDviLgtIgK4CpidVp8EXJmWrwNOSLWbmcCyiNgSEVvJklrfRGdWCs26gPbXBNY5rqOyyQV8b0rVtfM+mPcDS3PfT5W0UtJPJR2bYp1AT26bnhSrrXsMICWtZ4B98/E6+/QiaZ6kbkndmzdvHur5mA1asy6gw+E+lHqKuDfFTW6t05ZOfkmfAXYA306hTcBBEfGUpBnAEklvAFRn96gdpp91jfbpHYy4FLgUoKurq+42ZkVq1gV0uI6Aava9KZ4OprVanmBSp/ufAyekZi8i4nng+bS8QtIvgMPIah/5ZrTJwMa03AMcCPRIGg3sQ9Yk1wMc32efWwo6HbMhaeYFdDiOgGr2vSnDZVLOqmhpE5mkWcCngHdGxG9z8YmSRqXl15F15j8cEZuAZyUdnfpXzgBuSLvdCNRGiJ0KLE8J62bgREnjU+f+iSlmVjrDtWmrWWZP7+SLpxxJ57gORNanNJRBC54OprUKq8FIuoasJrGfpB6ykV0LgLHAsjTa+PY0Yuw44HxJO4CdwAcjojZA4CyyEWkdZH02tX6by4CrJa0jq7nMAYiILZI+D9yVtjs/dyyzUpk9vZPuR7dwzR2PsTOCURL/fcbwq4kMRTNrZp4OprWUWqlGvK6uruju7m53MWyE6dsnAFkNpspDi8vM73fzSVoREV311nk2ZbM28jDc1mp2k5s15qlizNrIfQKtNxwHQ5SVazBmbTQcHtRl1h8nGLM28igyG87cRGbWRsP1BkkzcIIxazv3Cdhw5QRjZiPKkpUbXGNsEScYs2HGF9D+eS6y1hpQJ7+kHw8kZmbt5eenNOb7jlqrYQ1G0iuAV5JN9zKeXTMV7w0cUHDZzAo1lE/6Za0leDLHxnzfUWvtronsA8BHyZLJ3bn4r4H/U1CZzAo3lKaSMjez+ALamOcia62GTWQR8dWImAp8MiKm5l5vjoivtaiMZk03lKaSMjezlP3GzXY/7Mv3HbXW7prI3hoRy4ENkk7puz4iri+sZGYFGson/TLXEpr9/JRmKkPNz/cdtdbumsiOA5YD/63OugCcYKyShtJUUuZmlj25gLaqP2mw/UNFlcv3HbXO7hLM1vT1soj4j6ILY9YqQ/mkX+ZaAgzuAtrKWsVgan5lqO3Y0O1umPJfpa8XF10Qs1YayrTtw2nK91b2Jw2mf6jM/VzDSdF9YrurwTwoaT0wUdJ9ubiAiIg3NbU0Zi00lKaS4dLM0sr+pMHU/MrczzVctKKW2DDBRMTpkl5L9kz7dzblJ5pZabSyP2kw/UNl7ucaLlpxz9Rup4qJiF8Bb5b0cuCwFF4TEdubUgIza5tW9ycNtOZXRLnKenNsu7SiljjQqWL+K7CW7ObKrwM/l3Tcbva5XNITku7PxSZIWiZpbfo6PrdugaR1ktZImpmLz5C0Kq27WJJSfKykRSl+h6QpuX3mpp+xVtLcAb4XZqXQyntFytqf1OxyeQqdl2rFPVOKiN1vJK0A/jIi1qTvDwOuiYgZDfY5DngOuCoi3phiXwK2RMRCSfOB8RHxKUlHANcAR5HNGvAj4LCI2CnpTuAjwO3ATcDFEbFU0t8Cb4qID0qaA5wcEadJmgB0A11kQ6lXADMiYisNdHV1RXd3927fC7Mi9W0Xh+yTexku+lV2zMLldZvcOsd18LP5b21DidqvWX9rklZERFe9dQN9ouWYWnIBiIifA2Ma7RARtwJb+oRPAq5My1cCs3PxayPi+Yh4BFgHHCVpErB3RNwWWSa8qs8+tWNdB5yQajczgWURsSUllWXArAGep1lbefRUMTxo4KVaUXsd6HT9KyRdBlydvn83Wc1gsPaPiE0AEbFJ0mtSvJOshlLTk2Lb03LfeG2fx9Kxdkh6Btg3H6+zTy+S5gHzAA466KA9OB2z5mr3hXC49lN40EB9RY+GHGiC+SBwNvBhsiHKt5L1xTSL6sSiQXxP9+kdjLgUuBSyJrLdF9OapSoXslaXs50XwuF8c+Nbfn8i37r9l3Xj9VTl77PsdttEJullwIqI+EpEnBIRJ0fERRHx/B78vMdTsxfp6xMp3gMcmNtuMrAxxSfXiffaR9JoYB+yJrn+jmUlUZUO13aUs52TMQ7n5rmfPLR5wPGq/H1WwW4TTES8ANwrqRltSDcCtVFdc4EbcvE5aWTYVOBQ4M7UnPaspKNT/8oZffapHetUYHnqp7kZOFHS+DRK7cQUs5KoyoWsHeVs56iudjfPFWkw51aVv88qGGgT2SRgdRrR9ZtaMCL6vflS0jXA8WQPK+sBzgUWAoslnQn8EnhXOs5qSYuBB4AdwNkRUfsNnwVcAXQAS9ML4DLgaknryGouc9Kxtkj6PHBX2u78iOg72MDaqCoXsnaVs12zBAznforBnFtV/j6rYKAJ5nODPXBEnN7PqhP62f4LwBfqxLuBN9aJ/46UoOqsuxy4fMCFtZaqyoWsKuVslrJP4jkUgzm3kfZ7L1LDJjJJr5D0UbIL+e8DP4uIn9ZerSigDT9VeehTVcrZLGW96bIZBnNuI+33XqSGN1pKWkQ2VPjfgbcDj0bER1pUtpbyjZatVZVROlUp554Yzuc2VH5vBq7RjZa7SzCrIuLItDyarOP9D4opZnvtaYLxH6JVkWcMsGYZyp38L05oGRE7mlqqYcDDGa2qPFLKWmF3CebNkn6dXs8Cb6otS/p1KwpYZv4ntarySClrhd09D2ZUo/Ujnf9Jrao8UspaYaCTXVodrZju2qwIHillreAEMwT+J7WqGs5Dkq08BnqjpdUxmEfAmpVNu2YMsJHDCWaI/E9qZlafm8jMzKwQTjBmZlYIN5ENE2WeUaDMZTOz4jjBDANlfhJhmctmZsVyE9kwUOYZBcpcNjMrlmsww0CZZxQoc9mqyM2NViWuwQwDZZ5RoL8yvExi6vwfcMzC5Z4cdIA8uapVjRPMMFDmGQXqlQ1gZ4QvkoPk5karGieYYaDM0370Ldso6SXb+CI5MG5utKppeR+MpMOBRbnQ64DPAuOAvwE2p/inI+KmtM8C4ExgJ/DhiLg5xWcAVwAdwE3ARyIiJI0FrgJmAE8Bp0XE+kJPrM3KPKNAvmxT5/+g7ja+SO6eZ0C2qml5DSYi1kTEtIiYRpYAfgt8L62+qLYul1yOAOYAbwBmAV+XVGtzuQSYBxyaXrNS/Exga0QcAlwEXFD8mdlAlLm/qOzK3BRqVk+7m8hOAH4REY822OYk4NqIeD4iHgHWAUdJmgTsHRG3Rfbc56uA2bl9rkzL1wEnSHXaZqzlfJHcc2VuCjWrp93DlOcA1+S+/ztJZwDdwCciYivQCdye26Ynxban5b5x0tfHIHvUs6RngH2BJ/M/XNI8shoQBx10UJNOyRrxDNRDU+amULO+2pZgJL0ceCewIIUuAT4PRPr6ZeD9QL2aRzSIs5t1uwIRlwKXAnR1db1kvRWjlRdJ3zdi1j7tbCJ7O3B3RDwOEBGPR8TOiHgB+CZwVNquBzgwt99kYGOKT64T77WPpNHAPsCWgs7DSsr3jZi1VzsTzOnkmsdSn0rNycD9aflGYI6ksZKmknXm3xkRm4BnJR2d+lfOAG7I7TM3LZ8KLE/9NDaC+L6R3VuycgPHLFzum16tEG1pIpP0SuBPgQ/kwl+SNI2sKWt9bV1ErJa0GHgA2AGcHRG1q8ZZ7BqmvDS9AC4Drpa0jqzmMqfA07GS8n0jjXkiUitaWxJMRPyWrNM9H3tvg+2/AHyhTrwbeGOd+O+Adw29pFZlvm+ksUY1PCcYa4Z2D1M2K4yHRDfmGp4VzQnGhi3fN9KYb3q1orX7PhizQvm+kf6Hap8z8/BefTAw8mp4HsZeLCcYs2FsIB35I/UC60EOxXOCMRvGdteRP5JreB7kUDz3wZgNY+7I75/fm+I5wZgNY+7I75/fm+I5wZgNYx6q3T+/N8VzH4zZMDbSO/Ib8XtTPHmKrkxXV1d0d3e3uxhmZpUiaUVEdNVb5yYyMzMrhBOMmZkVwgnGzMwK4QRjZmaFcIIxM7NCOMGYmVkhnGDMzKwQTjBmZlaItiQYSeslrZJ0j6TuFJsgaZmktenr+Nz2CyStk7RG0sxcfEY6zjpJF0tSio+VtCjF75A0peUnaWY2wrWzBvOWiJiWuwN0PvDjiDgU+HH6HklHAHOANwCzgK9Lqk0gdAkwDzg0vWal+JnA1og4BLgIuKAF52PWFEtWbuCYhcuZOv8HHLNwOUtWbmh3kcz2SJmayE4CrkzLVwKzc/FrI+L5iHgEWAccJWkSsHdE3BbZfDdX9dmndqzrgBNqtRuzMqs9BGvD09sIdj0Ey0nGqqhdCSaAH0paIWleiu0fEZsA0tfXpHgn8Fhu354U60zLfeO99omIHcAzwL4FnIdZUzV6CJZZ1bRrNuVjImKjpNcAyyQ91GDbejWPaBBvtE/vA2fJbR7AQQcd1LjEZi3gh2DZcNKWGkxEbExfnwC+BxwFPJ6avUhfn0ib9wAH5nafDGxM8cl14r32kTQa2AfYUqccl0ZEV0R0TZw4sTknZzYEfgiWDSctTzCSXiXp1bVl4ETgfuBGYG7abC5wQ1q+EZiTRoZNJevMvzM1oz0r6ejUv3JGn31qxzoVWB5+LoFVgB+CZcNJO5rI9ge+l/rcRwPfiYh/k3QXsFjSmcAvgXcBRMRqSYuBB4AdwNkRUWukPgu4AugAlqYXwGXA1ZLWkdVc5rTixMyGyg/BsuHEDxxL/MAxM7PBa/TAMT8y2cyaYsnKDa55WS9OMGY2ZLX7d2pDrGv37wBOMiNYmW60NLOK8v07Vo8TjJkNme/fsXqcYMxsyHz/jtXjBGNmQ+b7d6wed/Kb2ZD5/h2rxwnGzJpi9vROJxTrxQmmCTz+38zspZxghsjj/83M6nMn/xB5/L+ZWX1OMEPk8f9mZvU5wQyRx/+bmdXnBDNEHv9vZlafO/mHyOP/zczqc4JpAo//NzN7KTeRmZlZIZxgzMysEE4wZmZWiJYnGEkHSvqJpAclrZb0kRQ/T9IGSfek1zty+yyQtE7SGkkzc/EZklaldRdLUoqPlbQoxe+QNKXV52lmNtK1owazA/hERLweOBo4W9IRad1FETEtvW4CSOvmAG8AZgFfl1QbF3wJMA84NL1mpfiZwNaIOAS4CLigBedlZmY5LU8wEbEpIu5Oy88CDwKNhmCdBFwbEc9HxCPAOuAoSZOAvSPitogI4Cpgdm6fK9PydcAJtdqNmZm1Rlv7YFLT1XTgjhT6O0n3Sbpc0vgU6wQey+3Wk2KdablvvNc+EbEDeAbYt87PnyepW1L35s2bm3NSZmYGtDHBSNoL+C7w0Yj4NVlz1+8B04BNwJdrm9bZPRrEG+3TOxBxaUR0RUTXxIkTB3cCZmbWUFsSjKQxZMnl2xFxPUBEPB4ROyPiBeCbwFFp8x7gwNzuk4GNKT65TrzXPpJGA/sAW4o5GzMzq6cdo8gEXAY8GBFfycUn5TY7Gbg/Ld8IzEkjw6aSdebfGRGbgGclHZ2OeQZwQ26fuWn5VGB56qcxM7MWacdUMccA7wVWSbonxT4NnC5pGllT1nrgAwARsVrSYuABshFoZ0dE7QEsZwFXAB3A0vSCLIFdLWkdWc1lTqFnZGZmLyF/sM90dXVFd3d3u4thZlYpklZERFe9db6T38zMCuEEY2ZmhXCCMTOzQvh5MGYltmTlBj/MzirLCcaspJas3MCC61exbXs2aHLD09tYcP0qACcZqwQ3kZmV1IU3r3kxudRs276TC29e06YSmQ2OE4xZSW18etug4mZl4wRjVlIHjOsYVNysbJxgzErqnJmH0zFmVK9Yx5hRnDPz8DaVyGxw3MlvVlK1jnyPIrOqcoIxK7HZ0zudUKyy3ERmZmaFcIIxM7NCOMGYmVkhnGDMzKwQTjBmZlYIP3AskbQZeLTd5cjZD3iy3YXYA1UtN1S37C5361W17EWU++CImFhvhRNMSUnq7u8pcWVW1XJDdcvucrdeVcve6nK7iczMzArhBGNmZoVwgimvS9tdgD1U1XJDdcvucrdeVcve0nK7D8bMzArhGoyZmRXCCcbMzArhBFMwSeMkXSfpIUkPSvpjSRMkLZO0Nn0dn9t+gaR1ktZImpmLz5C0Kq27WJJSfKykRSl+h6QpTSr3xyStlnS/pGskvaKs5ZZ0uaQnJN2fi7WkrJLmpp+xVtLcJpT7wvS3cp+k70kaV4Vy59Z9UlJI2q9s5W5UdkkfSuVbLelLZSt7P38r0yTdLukeSd2SjipbuYkIvwp8AVcCf52WXw6MA74EzE+x+cAFafkI4F5gLDAV+AUwKq27E/hjQMBS4O0p/rfAN9LyHGBRE8rcCTwCdKTvFwPvK2u5geOAPwDuz8UKLyswAXg4fR2flscPsdwnAqPT8gVVKXeKHwjcTHbD8n5lK3eD9/wtwI+Asen715St7P2U+4e5n/sO4JbSlXuoFyO/Gv5R7E12oVaf+BpgUlqeBKxJywuABbntbk5/DJOAh3Lx04F/ym+TlkeT3aWrIZa7E3gs/VGNBr5PduErbbmBKX3++Qova36btO6fgNOHUu4+604Gvl2VcgPXAW8G1rMrwZSq3P38rSwG3lZnu1KVvU65bwZOy5XhO2Urt5vIivU6YDPwL5JWSvpnSa8C9o+ITQDp62vS9rULe01PinWm5b7xXvtExA7gGWDfoRQ6IjYA/wj8EtgEPBMRPyx7uftoRVn7O1azvJ/sU2bpyy3pncCGiLi3z6pSlzs5DDg2NQ39VNIfVqTsHwUulPQY2f/rgrKV2wmmWKPJqrWXRMR04DdkzTX9UZ1YNIg32mePKeuvOImsen0A8CpJ72m0Sz9laGm5B6iZZS3sHCR9BtgBfHsIZWhJuSW9EvgM8Nl6q/egDK1+v0eTNf8cDZwDLE59E2Uv+1nAxyLiQOBjwGVDKEMh5XaCKVYP0BMRd6TvryNLOI9LmgSQvj6R2/7A3P6TgY0pPrlOvNc+kkYD+wBbhljutwGPRMTmiNgOXA/8lwqUO68VZe3vWEOSOlL/HHh3pHaJkpf798g+jNwraX063t2SXlvyctf0ANdH5k7gBbJJIcte9rlk/5sA/wrUOvnLU+7Btl/6Nej23n8HDk/L5wEXple+A/pLafkN9O6ce5hdnXN3kX3CqnXOvSPFz6Z359ziJpT5j4DVwCvTz7sS+FCZy81L26cLLytZH9UjZJ9+x6flCUMs9yzgAWBin+1KXe4+69azqw+mVOXu5z3/IHB+Wj6MrElIZSt7nXI/CByflk8AVpTtPW/7BXi4v4BpQDdwH7Ak/ZL2BX4MrE1fJ+S2/wzZqI81pBEeKd4F3J/WfY1dszC8guzTyzqyESKva1K5Pwc8lH7m1emPtZTlBq4h6yvaTvaJ68xWlZWsn2Rdev1VE8q9juwCd096faMK5e6zfj0pwZSp3A3e85cD30pluRt4a9nK3k+5/wRYQZZM7gBmlK3cnirGzMwK4T4YMzMrhBOMmZkVwgnGzMwK4QRjZmaFcIIxM7NCOMGY9UPSyWlm4N/vZ/0tkroGcbwuSRcPYLv/3M36Tw/0Z/bZ76Pprvva9zcpN1uzWbN5mLJZPyQtJpsg8McRcV6d9bcAn4yI7haX67mI2KtOXGT/0y/0s996oCsiniy4iGaAazBmdUnaCziG7Ia2OSnWIelaZc9qWQR05LZ/TtIFklZI+pGko1IN5+E0ESSSjpf0/bR8XnrGR22bD+ePlb5OknRret7H/ZKOlbQQ6Eixb0uaouw5Q18nu0nwQEmXpOeDrJb0uXSsD5PNK/cTST9JsfVKz22R9PH0M+6X9NEUqx37m+lYP5T04jmb7dae3j3tl1/D+QW8B7gsLf8n2RxyHwcuT7E3kU1G2ZW+D3Y9W+N7ZM/qGEM2ff09KX488P20fF467liyea+eAsakdc+lr58APpOWRwGvzq9Py1PI5s46OhebkNvnFuBN6fv19L7Dfn362TOAVcCrgL3Ipgmano69A5iWtl8MvKfdvxu/qvNyDcasvtOBa9Pyten748imFCEi7iOb/qfm/wH/lpZXAT+NbKLQVWQX6np+EBHPR9Zk9QSwf5/1dwF/Jek84MiIeLaf4zwaEbfnvv8LSXcDK8nmpTqiwXlCNuXI9yLiNxHxHNkEisemdY9ExD1peUWDczF7idHtLoBZ2UjaF3gr8EZJQVYTCLILdn+dltsjorbuBeB5gIh4Ic1OW8/zueWd9Pl/jIhbJR0H/BlwtaQLI+KqOsf5Ta7sU4FPAn8YEVslXUE2z1Qj9aZk76+MbiKzAXMNxuylTgWuioiDI2JKZM/beISsj+PdAJLeSNZMVhhJBwNPRMQ3yZ718Qdp1XZJY/rZbW+yhPOMpP2Bt+fWPQu8us4+twKzJb0yPRDvZLJZwM2GxDUYs5c6HVjYJ/Zdsn6JDkn3kc10fGfB5TgeOEfSduA54IwUvxS4LzWDfSa/Q0TcK2klWT/Kw8DPcqsvBZZK2hQRb8ntc3eq6dTO558jYqWkKc0/JRtJPEzZzMwK4SYyMzMrhBOMmZkVwgnGzMwK4QRjZmaFcIIxM7NCOMGYmVkhnGDMzKwQ/x/J+3ImvK9sZwAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.scatter(df['Administration'], df['Profit'])\n", + "#plt.plot(X_train, regressor.predict(X_train), color = 'blue')\n", + "plt.title('Administration vs Profit')\n", + "plt.xlabel('Administration')\n", + "plt.ylabel('Profit')\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 129, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 0, 'State')" + ] + }, + "execution_count": 129, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "ax = df.groupby(['State'])['Profit'].mean().plot.bar(\n", + " figsize = (10,5))\n", + "ax.set_title(\"Average profit for different states where the startups operate\")\n", + "\n", + "ax.set_xlabel(\"State\")\n", + "ax.set_xlabel(\"State\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 130, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "New York 17\n", + "California 17\n", + "Florida 16\n", + "Name: State, dtype: int64" + ] + }, + "execution_count": 130, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.State.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 131, + "metadata": {}, + "outputs": [], + "source": [ + "df['NewYork_State'] = np.where(df['State']=='New York',1,0)\n", + "df['California_State'] = np.where(df['State']=='California',1,0)\n", + "df['Florida_State'] = np.where(df['State']=='Florida',1,0)\n", + "\n", + "df.drop(columns=['State'],axis=1,inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 132, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
R&D SpendAdministrationMarketing SpendProfitNewYork_StateCalifornia_StateFlorida_State
0165349.20136897.80471784.10192261.83100
1162597.70151377.59443898.53191792.06010
2153441.51101145.55407934.54191050.39001
3144372.41118671.85383199.62182901.99100
4142107.3491391.77366168.42166187.94001
\n", + "
" + ], + "text/plain": [ + " R&D Spend Administration Marketing Spend Profit NewYork_State \\\n", + "0 165349.20 136897.80 471784.10 192261.83 1 \n", + "1 162597.70 151377.59 443898.53 191792.06 0 \n", + "2 153441.51 101145.55 407934.54 191050.39 0 \n", + "3 144372.41 118671.85 383199.62 182901.99 1 \n", + "4 142107.34 91391.77 366168.42 166187.94 0 \n", + "\n", + " California_State Florida_State \n", + "0 0 0 \n", + "1 1 0 \n", + "2 0 1 \n", + "3 0 0 \n", + "4 0 1 " + ] + }, + "execution_count": 132, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 133, + "metadata": {}, + "outputs": [], + "source": [ + "dependent_variable = 'Profit'" + ] + }, + { + "cell_type": "code", + "execution_count": 134, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['R&D Spend',\n", + " 'Administration',\n", + " 'Marketing Spend',\n", + " 'NewYork_State',\n", + " 'California_State',\n", + " 'Florida_State']" + ] + }, + "execution_count": 134, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "independent_variables = df.columns.tolist()\n", + "independent_variables.remove(dependent_variable)\n", + "independent_variables" + ] + }, + { + "cell_type": "code", + "execution_count": 135, + "metadata": {}, + "outputs": [], + "source": [ + "X = df[independent_variables].values\n", + "y = df[dependent_variable].values" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[1.6534920e+05 1.3689780e+05 4.7178410e+05 1.0000000e+00 0.0000000e+00\n", + " 0.0000000e+00]\n", + " [1.6259770e+05 1.5137759e+05 4.4389853e+05 0.0000000e+00 1.0000000e+00\n", + " 0.0000000e+00]\n", + " [1.5344151e+05 1.0114555e+05 4.0793454e+05 0.0000000e+00 0.0000000e+00\n", + " 1.0000000e+00]\n", + " [1.4437241e+05 1.1867185e+05 3.8319962e+05 1.0000000e+00 0.0000000e+00\n", + " 0.0000000e+00]\n", + " [1.4210734e+05 9.1391770e+04 3.6616842e+05 0.0000000e+00 0.0000000e+00\n", + " 1.0000000e+00]\n", + " [1.3187690e+05 9.9814710e+04 3.6286136e+05 1.0000000e+00 0.0000000e+00\n", + " 0.0000000e+00]\n", + " [1.3461546e+05 1.4719887e+05 1.2771682e+05 0.0000000e+00 1.0000000e+00\n", + " 0.0000000e+00]\n", + " [1.3029813e+05 1.4553006e+05 3.2387668e+05 0.0000000e+00 0.0000000e+00\n", + " 1.0000000e+00]\n", + " [1.2054252e+05 1.4871895e+05 3.1161329e+05 1.0000000e+00 0.0000000e+00\n", + " 0.0000000e+00]\n", + " [1.2333488e+05 1.0867917e+05 3.0498162e+05 0.0000000e+00 1.0000000e+00\n", + " 0.0000000e+00]\n", + " [1.0191308e+05 1.1059411e+05 2.2916095e+05 0.0000000e+00 0.0000000e+00\n", + " 1.0000000e+00]\n", + " [1.0067196e+05 9.1790610e+04 2.4974455e+05 0.0000000e+00 1.0000000e+00\n", + " 0.0000000e+00]\n", + " [9.3863750e+04 1.2732038e+05 2.4983944e+05 0.0000000e+00 0.0000000e+00\n", + " 1.0000000e+00]\n", + " [9.1992390e+04 1.3549507e+05 2.5266493e+05 0.0000000e+00 1.0000000e+00\n", + " 0.0000000e+00]\n", + " [1.1994324e+05 1.5654742e+05 2.5651292e+05 0.0000000e+00 0.0000000e+00\n", + " 1.0000000e+00]\n", + " [1.1452361e+05 1.2261684e+05 2.6177623e+05 1.0000000e+00 0.0000000e+00\n", + " 0.0000000e+00]\n", + " [7.8013110e+04 1.2159755e+05 2.6434606e+05 0.0000000e+00 1.0000000e+00\n", + " 0.0000000e+00]\n", + " [9.4657160e+04 1.4507758e+05 2.8257431e+05 1.0000000e+00 0.0000000e+00\n", + " 0.0000000e+00]\n", + " [9.1749160e+04 1.1417579e+05 2.9491957e+05 0.0000000e+00 0.0000000e+00\n", + " 1.0000000e+00]\n", + " [8.6419700e+04 1.5351411e+05 0.0000000e+00 1.0000000e+00 0.0000000e+00\n", + " 0.0000000e+00]\n", + " [7.6253860e+04 1.1386730e+05 2.9866447e+05 0.0000000e+00 1.0000000e+00\n", + " 0.0000000e+00]\n", + " [7.8389470e+04 1.5377343e+05 2.9973729e+05 1.0000000e+00 0.0000000e+00\n", + " 0.0000000e+00]\n", + " [7.3994560e+04 1.2278275e+05 3.0331926e+05 0.0000000e+00 0.0000000e+00\n", + " 1.0000000e+00]\n", + " [6.7532530e+04 1.0575103e+05 3.0476873e+05 0.0000000e+00 0.0000000e+00\n", + " 1.0000000e+00]\n", + " [7.7044010e+04 9.9281340e+04 1.4057481e+05 1.0000000e+00 0.0000000e+00\n", + " 0.0000000e+00]\n", + " [6.4664710e+04 1.3955316e+05 1.3796262e+05 0.0000000e+00 1.0000000e+00\n", + " 0.0000000e+00]\n", + " [7.5328870e+04 1.4413598e+05 1.3405007e+05 0.0000000e+00 0.0000000e+00\n", + " 1.0000000e+00]\n", + " [7.2107600e+04 1.2786455e+05 3.5318381e+05 1.0000000e+00 0.0000000e+00\n", + " 0.0000000e+00]\n", + " [6.6051520e+04 1.8264556e+05 1.1814820e+05 0.0000000e+00 0.0000000e+00\n", + " 1.0000000e+00]\n", + " [6.5605480e+04 1.5303206e+05 1.0713838e+05 1.0000000e+00 0.0000000e+00\n", + " 0.0000000e+00]\n", + " [6.1994480e+04 1.1564128e+05 9.1131240e+04 0.0000000e+00 0.0000000e+00\n", + " 1.0000000e+00]\n", + " [6.1136380e+04 1.5270192e+05 8.8218230e+04 1.0000000e+00 0.0000000e+00\n", + " 0.0000000e+00]\n", + " [6.3408860e+04 1.2921961e+05 4.6085250e+04 0.0000000e+00 1.0000000e+00\n", + " 0.0000000e+00]\n", + " [5.5493950e+04 1.0305749e+05 2.1463481e+05 0.0000000e+00 0.0000000e+00\n", + " 1.0000000e+00]\n", + " [4.6426070e+04 1.5769392e+05 2.1079767e+05 0.0000000e+00 1.0000000e+00\n", + " 0.0000000e+00]\n", + " [4.6014020e+04 8.5047440e+04 2.0551764e+05 1.0000000e+00 0.0000000e+00\n", + " 0.0000000e+00]\n", + " [2.8663760e+04 1.2705621e+05 2.0112682e+05 0.0000000e+00 0.0000000e+00\n", + " 1.0000000e+00]\n", + " [4.4069950e+04 5.1283140e+04 1.9702942e+05 0.0000000e+00 1.0000000e+00\n", + " 0.0000000e+00]\n", + " [2.0229590e+04 6.5947930e+04 1.8526510e+05 1.0000000e+00 0.0000000e+00\n", + " 0.0000000e+00]\n", + " [3.8558510e+04 8.2982090e+04 1.7499930e+05 0.0000000e+00 1.0000000e+00\n", + " 0.0000000e+00]\n", + " [2.8754330e+04 1.1854605e+05 1.7279567e+05 0.0000000e+00 1.0000000e+00\n", + " 0.0000000e+00]\n", + " [2.7892920e+04 8.4710770e+04 1.6447071e+05 0.0000000e+00 0.0000000e+00\n", + " 1.0000000e+00]\n", + " [2.3640930e+04 9.6189630e+04 1.4800111e+05 0.0000000e+00 1.0000000e+00\n", + " 0.0000000e+00]\n", + " [1.5505730e+04 1.2738230e+05 3.5534170e+04 1.0000000e+00 0.0000000e+00\n", + " 0.0000000e+00]\n", + " [2.2177740e+04 1.5480614e+05 2.8334720e+04 0.0000000e+00 1.0000000e+00\n", + " 0.0000000e+00]\n", + " [1.0002300e+03 1.2415304e+05 1.9039300e+03 1.0000000e+00 0.0000000e+00\n", + " 0.0000000e+00]\n", + " [1.3154600e+03 1.1581621e+05 2.9711446e+05 0.0000000e+00 0.0000000e+00\n", + " 1.0000000e+00]\n", + " [0.0000000e+00 1.3542692e+05 0.0000000e+00 0.0000000e+00 1.0000000e+00\n", + " 0.0000000e+00]\n", + " [5.4205000e+02 5.1743150e+04 0.0000000e+00 1.0000000e+00 0.0000000e+00\n", + " 0.0000000e+00]\n", + " [0.0000000e+00 1.1698380e+05 4.5173060e+04 0.0000000e+00 1.0000000e+00\n", + " 0.0000000e+00]]\n", + "[192261.83 191792.06 191050.39 182901.99 166187.94 156991.12 156122.51\n", + " 155752.6 152211.77 149759.96 146121.95 144259.4 141585.52 134307.35\n", + " 132602.65 129917.04 126992.93 125370.37 124266.9 122776.86 118474.03\n", + " 111313.02 110352.25 108733.99 108552.04 107404.34 105733.54 105008.31\n", + " 103282.38 101004.64 99937.59 97483.56 97427.84 96778.92 96712.8\n", + " 96479.51 90708.19 89949.14 81229.06 81005.76 78239.91 77798.83\n", + " 71498.49 69758.98 65200.33 64926.08 49490.75 42559.73 35673.41\n", + " 14681.4 ]\n" + ] + } + ], + "source": [ + "print(X)\n", + "print(y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Splitting the dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 1/3, random_state = 0)" + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0.00327821, 0.00444355, 0. , 1. , 0. ,\n", + " 0. ],\n", + " [0.39676926, 0.98286294, 0.22709197, 1. , 0. ,\n", + " 0. ],\n", + " [0.69261666, 0.68906137, 0.55486446, 1. , 0. ,\n", + " 0. ],\n", + " [0.37493063, 0.62167963, 0.19316302, 0. , 0. ,\n", + " 1. ]])" + ] + }, + "execution_count": 138, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.preprocessing import MinMaxScaler\n", + "sc = MinMaxScaler()\n", + "X_train = sc.fit_transform(X_train)\n", + "X_test = sc.transform(X_test)\n", + "#print(X_train)\n", + "#print(X_test)\n", + "X_train[1:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Multiple Linear Regression Model" + ] + }, + { + "cell_type": "code", + "execution_count": 139, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LinearRegression()" + ] + }, + "execution_count": 139, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.linear_model import LinearRegression\n", + "regressor = LinearRegression()\n", + "regressor.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 140, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = regressor.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 141, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "102" + ] + }, + "execution_count": 141, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_test.size\n" + ] + }, + { + "cell_type": "code", + "execution_count": 142, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "17" + ] + }, + "execution_count": 142, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_pred.size" + ] + }, + { + "cell_type": "code", + "execution_count": 143, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "198" + ] + }, + "execution_count": 143, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_train.size\n" + ] + }, + { + "cell_type": "code", + "execution_count": 144, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "33" + ] + }, + "execution_count": 144, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_train.size" + ] + }, + { + "cell_type": "code", + "execution_count": 145, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "17" + ] + }, + "execution_count": 145, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_test.size" + ] + }, + { + "cell_type": "code", + "execution_count": 146, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0.39946683, 1.26892014, 0.25042853, 0. , 0. ,\n", + " 1. ],\n", + " [0.60884455, 0.39128957, 0.52936195, 0. , 1. ,\n", + " 0. ],\n", + " [0.61635061, 0.57292553, 0.48573267, 0. , 0. ,\n", + " 1. ],\n", + " [0.16869099, 0.32290051, 0.34861436, 0. , 0. ,\n", + " 1. ],\n", + " [0.92798459, 0.48165538, 0.8646636 , 0. , 0. ,\n", + " 1. ],\n", + " [0.43609283, 0.73975262, 0.74861321, 1. , 0. ,\n", + " 0. ],\n", + " [0.12234465, 0.14165731, 0.39269043, 1. , 0. ,\n", + " 0. ],\n", + " [0.36974101, 0.97967389, 0.18698856, 1. , 0. ,\n", + " 0. ],\n", + " [0.4475048 , 0.69066401, 0.64291963, 0. , 0. ,\n", + " 1. ],\n", + " [0.85943772, 0.3874369 , 0.77613557, 0. , 0. ,\n", + " 1. ],\n", + " [0.33561668, 0.50012413, 0.45494286, 0. , 0. ,\n", + " 1. ],\n", + " [0.2782839 , 0.32615264, 0.43561799, 1. , 0. ,\n", + " 0. ],\n", + " [0.45557444, 0.89692957, 0.28413435, 0. , 0. ,\n", + " 1. ],\n", + " [0.2807759 , 1.02789506, 0.44680961, 0. , 1. ,\n", + " 0. ],\n", + " [0.55488118, 0.60752345, 0.62511553, 0. , 0. ,\n", + " 1. ],\n", + " [0.7880179 , 0.91039595, 0.68649342, 0. , 0. ,\n", + " 1. ],\n", + " [0.72539353, 1.01682022, 0.54370828, 0. , 0. ,\n", + " 1. ]])" + ] + }, + "execution_count": 146, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_test" + ] + }, + { + "cell_type": "code", + "execution_count": 147, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([111616.64259451, 132709.39466316, 140155.11033793, 76099.20398184,\n", + " 186329.94240372, 112822.19807255, 63002.00394804, 99107.10428091,\n", + " 119287.75473383, 175522.83864739, 101000.698615 , 85772.99293235,\n", + " 117713.76481525, 90230.88085201, 133375.04389452, 167530.54765828,\n", + " 158013.54602063])" + ] + }, + "execution_count": 147, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_pred" + ] + }, + { + "cell_type": "code", + "execution_count": 148, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([103282.38, 144259.4 , 146121.95, 77798.83, 191050.39, 105008.31,\n", + " 81229.06, 97483.56, 110352.25, 166187.94, 96778.92, 96479.51,\n", + " 105733.54, 96712.8 , 124266.9 , 155752.6 , 132602.65])" + ] + }, + "execution_count": 148, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_test" + ] + }, + { + "cell_type": "code", + "execution_count": 149, + "metadata": {}, + "outputs": [], + "source": [ + "#regressor.predict([[165349.20],[136897.80],[471784.10],[1,0,0]])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Amrin_ML.md b/Amrin_ML.md new file mode 100644 index 000000000..1c61a7f6a --- /dev/null +++ b/Amrin_ML.md @@ -0,0 +1,9 @@ +From this module we have learned : + + What is Machine Learning ? + Different types of Machine Learning. Pros and Cons + What is Regression and Classification? Difference between them. + Types of Regression. Linear VS Logistic Regression. + Types of Classification.Classification Problem Evaluation Metrics + What is Clustering? K-Means and Hierchichal Clustering theory and algorithm. + Assignment : Concrete Compressive Strength Prediction using Machine Learning (Linera Regression)