diff --git a/lab-hyper-tuning.ipynb b/lab-hyper-tuning.ipynb index 847d487..957ca2d 100644 --- a/lab-hyper-tuning.ipynb +++ b/lab-hyper-tuning.ipynb @@ -42,7 +42,12 @@ "#Libraries\n", "import pandas as pd\n", "import numpy as np\n", - "from sklearn.model_selection import train_test_split" + "\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.metrics import accuracy_score, classification_report, confusion_matrix\n", + "from sklearn.model_selection import GridSearchCV" ] }, { @@ -178,26 +183,14 @@ "" ], "text/plain": [ - " PassengerId HomePlanet CryoSleep Cabin Destination Age VIP \\\n", - "0 0001_01 Europa False B/0/P TRAPPIST-1e 39.0 False \n", - "1 0002_01 Earth False F/0/S TRAPPIST-1e 24.0 False \n", - "2 0003_01 Europa False A/0/S TRAPPIST-1e 58.0 True \n", - "3 0003_02 Europa False A/0/S TRAPPIST-1e 33.0 False \n", - "4 0004_01 Earth False F/1/S TRAPPIST-1e 16.0 False \n", - "\n", - " RoomService FoodCourt ShoppingMall Spa VRDeck Name \\\n", - "0 0.0 0.0 0.0 0.0 0.0 Maham Ofracculy \n", - "1 109.0 9.0 25.0 549.0 44.0 Juanna Vines \n", - "2 43.0 3576.0 0.0 6715.0 49.0 Altark Susent \n", - "3 0.0 1283.0 371.0 3329.0 193.0 Solam Susent \n", - "4 303.0 70.0 151.0 565.0 2.0 Willy Santantines \n", - "\n", - " Transported \n", - "0 False \n", - "1 True \n", - "2 False \n", - "3 False \n", - "4 True " + " PassengerId HomePlanet CryoSleep ... VRDeck Name Transported\n", + "0 0001_01 Europa False ... 0.0 Maham Ofracculy False\n", + "1 0002_01 Earth False ... 44.0 Juanna Vines True\n", + "2 0003_01 Europa False ... 49.0 Altark Susent False\n", + "3 0003_02 Europa False ... 193.0 Solam Susent False\n", + "4 0004_01 Earth False ... 2.0 Willy Santantines True\n", + "\n", + "[5 rows x 14 columns]" ] }, "execution_count": 2, @@ -221,11 +214,287 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "spaceship = spaceship.dropna()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Cabin\n", + "F 2152\n", + "G 1973\n", + "E 683\n", + "B 628\n", + "C 587\n", + "D 374\n", + "A 207\n", + "T 2\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "spaceship['Cabin'] = spaceship['Cabin'].str[0]\n", + "spaceship['Cabin'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjEAAAHFCAYAAAADhKhmAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjYsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvq6yFwwAAAAlwSFlzAAAPYQAAD2EBqD+naQAAK6lJREFUeJzt3X9w1PWdx/HXGpI1QPKVANnNnmsIgpQ0gDZ4IZSDKJGAxmh1ChqbQuUADwRTQBSdXrHtEcVBUFGGox4ooOC1gm2hkVAkLUL4EY0FmlIUOMKYJYBhAxgTCN/7w/E7XYJoMLD5JM/HzM6w333vN5+v7crT735347Jt2xYAAIBhrgr3AgAAAC4FEQMAAIxExAAAACMRMQAAwEhEDAAAMBIRAwAAjETEAAAAI7UL9wIul3PnzumTTz5RTEyMXC5XuJcDAAC+Adu2dfLkSfl8Pl111cXPtbTaiPnkk0/k9/vDvQwAAHAJKioqdO211150ptVGTExMjKQv/iHExsaGeTUAAOCbqKmpkd/vd/4ev5hWGzFfvoUUGxtLxAAAYJhvcikIF/YCAAAjETEAAMBIRAwAADASEQMAAIxExAAAACMRMQAAwEhEDAAAMBIRAwAAjETEAAAAIxExAADASEQMAAAwEhEDAACMRMQAAAAjETEAAMBIRAwAADBSu3AvAM2v2+Nrw70EXEEHn74j3EsAgLDgTAwAADASEQMAAIxExAAAACMRMQAAwEhc2AsABuHC/baFC/cvjjMxAADASEQMAAAwEhEDAACMRMQAAAAjETEAAMBIRAwAADASEQMAAIxExAAAACMRMQAAwEhEDAAAMBIRAwAAjETEAAAAIxExAADASEQMAAAwEhEDAACMRMQAAAAjETEAAMBIRAwAADASEQMAAIxExAAAACMRMQAAwEhEDAAAMBIRAwAAjETEAAAAIxExAADASE2KmIULF6pv376KjY1VbGys0tPT9cc//tF53LZtzZo1Sz6fT9HR0crIyNCePXtC9lFXV6fJkyerS5cu6tChg3JycnT48OGQmerqauXl5cmyLFmWpby8PJ04ceLSjxIAALQ6TYqYa6+9Vk8//bR27typnTt36tZbb9Vdd93lhMqcOXP03HPPacGCBdqxY4e8Xq9uu+02nTx50tlHfn6+Vq9erZUrV2rz5s06deqUsrOz1dDQ4Mzk5uaqrKxMhYWFKiwsVFlZmfLy8prpkAEAQGvgsm3b/jY7iIuL07PPPqsHH3xQPp9P+fn5euyxxyR9cdbF4/HomWee0YQJExQMBtW1a1ctW7ZMo0aNkiR98skn8vv9WrdunbKyslReXq7k5GSVlJQoLS1NklRSUqL09HT9/e9/V69evS64jrq6OtXV1Tn3a2pq5Pf7FQwGFRsb+20O0TjdHl8b7iXgCjr49B3hXgKuIF7fbUtbfH3X1NTIsqxv9Pf3JV8T09DQoJUrV+r06dNKT0/XgQMHFAgENGzYMGfG7XZryJAh2rJliySptLRUZ86cCZnx+XxKSUlxZrZu3SrLspyAkaQBAwbIsixn5kIKCgqct58sy5Lf77/UQwMAAAZocsTs2rVLHTt2lNvt1kMPPaTVq1crOTlZgUBAkuTxeELmPR6P81ggEFBUVJQ6dep00Zn4+PhGPzc+Pt6ZuZCZM2cqGAw6t4qKiqYeGgAAMEi7pj6hV69eKisr04kTJ/Tb3/5Wo0ePVnFxsfO4y+UKmbdtu9G2850/c6H5r9uP2+2W2+3+pocBAAAM1+QzMVFRUerRo4f69++vgoIC9evXT88//7y8Xq8kNTpbUlVV5Zyd8Xq9qq+vV3V19UVnjhw50ujnHj16tNFZHgAA0HZ96++JsW1bdXV1SkpKktfrVVFRkfNYfX29iouLNXDgQElSamqqIiMjQ2YqKyu1e/duZyY9PV3BYFDbt293ZrZt26ZgMOjMAAAANOntpCeeeEIjRoyQ3+/XyZMntXLlSm3atEmFhYVyuVzKz8/X7Nmz1bNnT/Xs2VOzZ89W+/btlZubK0myLEtjx47VtGnT1LlzZ8XFxWn69Onq06ePMjMzJUm9e/fW8OHDNW7cOC1atEiSNH78eGVnZ3/lJ5MAAEDb06SIOXLkiPLy8lRZWSnLstS3b18VFhbqtttukyTNmDFDtbW1mjhxoqqrq5WWlqb169crJibG2ce8efPUrl07jRw5UrW1tRo6dKiWLl2qiIgIZ2bFihWaMmWK8ymmnJwcLViwoDmOFwAAtBLf+ntiWqqmfM68teF7JNqWtvg9Em0Zr++2pS2+vq/I98QAAACEExEDAACMRMQAAAAjETEAAMBIRAwAADASEQMAAIxExAAAACMRMQAAwEhEDAAAMBIRAwAAjETEAAAAIxExAADASEQMAAAwEhEDAACMRMQAAAAjETEAAMBIRAwAADASEQMAAIxExAAAACMRMQAAwEhEDAAAMBIRAwAAjETEAAAAIxExAADASEQMAAAwEhEDAACMRMQAAAAjETEAAMBIRAwAADASEQMAAIxExAAAACMRMQAAwEhEDAAAMBIRAwAAjETEAAAAIxExAADASEQMAAAwEhEDAACMRMQAAAAjETEAAMBIRAwAADASEQMAAIzUpIgpKCjQzTffrJiYGMXHx+vuu+/W3r17Q2bGjBkjl8sVchswYEDITF1dnSZPnqwuXbqoQ4cOysnJ0eHDh0NmqqurlZeXJ8uyZFmW8vLydOLEiUs7SgAA0Oo0KWKKi4s1adIklZSUqKioSGfPntWwYcN0+vTpkLnhw4ersrLSua1bty7k8fz8fK1evVorV67U5s2bderUKWVnZ6uhocGZyc3NVVlZmQoLC1VYWKiysjLl5eV9i0MFAACtSbumDBcWFobcX7JkieLj41VaWqrBgwc7291ut7xe7wX3EQwG9corr2jZsmXKzMyUJC1fvlx+v18bNmxQVlaWysvLVVhYqJKSEqWlpUmSFi9erPT0dO3du1e9evVqtN+6ujrV1dU592tqappyaAAAwDDf6pqYYDAoSYqLiwvZvmnTJsXHx+uGG27QuHHjVFVV5TxWWlqqM2fOaNiwYc42n8+nlJQUbdmyRZK0detWWZblBIwkDRgwQJZlOTPnKygocN56sixLfr//2xwaAABo4S45Ymzb1tSpUzVo0CClpKQ420eMGKEVK1Zo48aNmjt3rnbs2KFbb73VOUsSCAQUFRWlTp06hezP4/EoEAg4M/Hx8Y1+Znx8vDNzvpkzZyoYDDq3ioqKSz00AABggCa9nfTPHn74Yf31r3/V5s2bQ7aPGjXK+XNKSor69++vxMRErV27Vvfcc89X7s+2bblcLuf+P//5q2b+mdvtltvtbuphAAAAQ13SmZjJkyfrd7/7nd59911de+21F51NSEhQYmKi9u3bJ0nyer2qr69XdXV1yFxVVZU8Ho8zc+TIkUb7Onr0qDMDAADatiZFjG3bevjhh/XWW29p48aNSkpK+trnHD9+XBUVFUpISJAkpaamKjIyUkVFRc5MZWWldu/erYEDB0qS0tPTFQwGtX37dmdm27ZtCgaDzgwAAGjbmvR20qRJk/T666/r7bffVkxMjHN9imVZio6O1qlTpzRr1izde++9SkhI0MGDB/XEE0+oS5cu+sEPfuDMjh07VtOmTVPnzp0VFxen6dOnq0+fPs6nlXr37q3hw4dr3LhxWrRokSRp/Pjxys7OvuAnkwAAQNvTpIhZuHChJCkjIyNk+5IlSzRmzBhFRERo165deu2113TixAklJCTolltu0apVqxQTE+PMz5s3T+3atdPIkSNVW1uroUOHaunSpYqIiHBmVqxYoSlTpjifYsrJydGCBQsu9TgBAEAr06SIsW37oo9HR0frnXfe+dr9XH311XrxxRf14osvfuVMXFycli9f3pTlAQCANoTfnQQAAIxExAAAACMRMQAAwEhEDAAAMBIRAwAAjETEAAAAIxExAADASEQMAAAwEhEDAACMRMQAAAAjETEAAMBIRAwAADASEQMAAIxExAAAACMRMQAAwEhEDAAAMBIRAwAAjETEAAAAIxExAADASEQMAAAwEhEDAACMRMQAAAAjETEAAMBIRAwAADASEQMAAIxExAAAACMRMQAAwEhEDAAAMBIRAwAAjETEAAAAIxExAADASEQMAAAwEhEDAACMRMQAAAAjETEAAMBIRAwAADASEQMAAIxExAAAACMRMQAAwEhEDAAAMBIRAwAAjNSkiCkoKNDNN9+smJgYxcfH6+6779bevXtDZmzb1qxZs+Tz+RQdHa2MjAzt2bMnZKaurk6TJ09Wly5d1KFDB+Xk5Ojw4cMhM9XV1crLy5NlWbIsS3l5eTpx4sSlHSUAAGh1mhQxxcXFmjRpkkpKSlRUVKSzZ89q2LBhOn36tDMzZ84cPffcc1qwYIF27Nghr9er2267TSdPnnRm8vPztXr1aq1cuVKbN2/WqVOnlJ2drYaGBmcmNzdXZWVlKiwsVGFhocrKypSXl9cMhwwAAFoDl23b9qU++ejRo4qPj1dxcbEGDx4s27bl8/mUn5+vxx57TNIXZ108Ho+eeeYZTZgwQcFgUF27dtWyZcs0atQoSdInn3wiv9+vdevWKSsrS+Xl5UpOTlZJSYnS0tIkSSUlJUpPT9ff//539erV62vXVlNTI8uyFAwGFRsbe6mHaKRuj68N9xJwBR18+o5wLwFXEK/vtqUtvr6b8vf3t7omJhgMSpLi4uIkSQcOHFAgENCwYcOcGbfbrSFDhmjLli2SpNLSUp05cyZkxufzKSUlxZnZunWrLMtyAkaSBgwYIMuynJnz1dXVqaamJuQGAABar0uOGNu2NXXqVA0aNEgpKSmSpEAgIEnyeDwhsx6Px3ksEAgoKipKnTp1uuhMfHx8o58ZHx/vzJyvoKDAuX7Gsiz5/f5LPTQAAGCAS46Yhx9+WH/961/1xhtvNHrM5XKF3Ldtu9G2850/c6H5i+1n5syZCgaDzq2iouKbHAYAADDUJUXM5MmT9bvf/U7vvvuurr32Wme71+uVpEZnS6qqqpyzM16vV/X19aqurr7ozJEjRxr93KNHjzY6y/Mlt9ut2NjYkBsAAGi9mhQxtm3r4Ycf1ltvvaWNGzcqKSkp5PGkpCR5vV4VFRU52+rr61VcXKyBAwdKklJTUxUZGRkyU1lZqd27dzsz6enpCgaD2r59uzOzbds2BYNBZwYAALRt7ZoyPGnSJL3++ut6++23FRMT45xxsSxL0dHRcrlcys/P1+zZs9WzZ0/17NlTs2fPVvv27ZWbm+vMjh07VtOmTVPnzp0VFxen6dOnq0+fPsrMzJQk9e7dW8OHD9e4ceO0aNEiSdL48eOVnZ39jT6ZBAAAWr8mRczChQslSRkZGSHblyxZojFjxkiSZsyYodraWk2cOFHV1dVKS0vT+vXrFRMT48zPmzdP7dq108iRI1VbW6uhQ4dq6dKlioiIcGZWrFihKVOmOJ9iysnJ0YIFCy7lGAEAQCv0rb4npiXje2LQVrTF75Foy3h9ty1t8fV9xb4nBgAAIFyIGAAAYCQiBgAAGImIAQAARiJiAACAkYgYAABgJCIGAAAYiYgBAABGImIAAICRiBgAAGAkIgYAABiJiAEAAEYiYgAAgJGIGAAAYCQiBgAAGImIAQAARiJiAACAkYgYAABgJCIGAAAYiYgBAABGImIAAICRiBgAAGAkIgYAABiJiAEAAEYiYgAAgJGIGAAAYCQiBgAAGImIAQAARiJiAACAkYgYAABgJCIGAAAYiYgBAABGImIAAICRiBgAAGAkIgYAABiJiAEAAEYiYgAAgJGIGAAAYCQiBgAAGImIAQAARiJiAACAkYgYAABgpCZHzJ///Gfdeeed8vl8crlcWrNmTcjjY8aMkcvlCrkNGDAgZKaurk6TJ09Wly5d1KFDB+Xk5Ojw4cMhM9XV1crLy5NlWbIsS3l5eTpx4kSTDxAAALROTY6Y06dPq1+/flqwYMFXzgwfPlyVlZXObd26dSGP5+fna/Xq1Vq5cqU2b96sU6dOKTs7Ww0NDc5Mbm6uysrKVFhYqMLCQpWVlSkvL6+pywUAAK1Uu6Y+YcSIERoxYsRFZ9xut7xe7wUfCwaDeuWVV7Rs2TJlZmZKkpYvXy6/368NGzYoKytL5eXlKiwsVElJidLS0iRJixcvVnp6uvbu3atevXo1ddkAAKCVuSzXxGzatEnx8fG64YYbNG7cOFVVVTmPlZaW6syZMxo2bJizzefzKSUlRVu2bJEkbd26VZZlOQEjSQMGDJBlWc7M+erq6lRTUxNyAwAArVezR8yIESO0YsUKbdy4UXPnztWOHTt06623qq6uTpIUCAQUFRWlTp06hTzP4/EoEAg4M/Hx8Y32HR8f78ycr6CgwLl+xrIs+f3+Zj4yAADQkjT57aSvM2rUKOfPKSkp6t+/vxITE7V27Vrdc889X/k827blcrmc+//856+a+WczZ87U1KlTnfs1NTWEDAAArdhl/4h1QkKCEhMTtW/fPkmS1+tVfX29qqurQ+aqqqrk8XicmSNHjjTa19GjR52Z87ndbsXGxobcAABA63XZI+b48eOqqKhQQkKCJCk1NVWRkZEqKipyZiorK7V7924NHDhQkpSenq5gMKjt27c7M9u2bVMwGHRmAABA29bkt5NOnTqljz76yLl/4MABlZWVKS4uTnFxcZo1a5buvfdeJSQk6ODBg3riiSfUpUsX/eAHP5AkWZalsWPHatq0aercubPi4uI0ffp09enTx/m0Uu/evTV8+HCNGzdOixYtkiSNHz9e2dnZfDIJAABIuoSI2blzp2655Rbn/pfXoYwePVoLFy7Url279Nprr+nEiRNKSEjQLbfcolWrVikmJsZ5zrx589SuXTuNHDlStbW1Gjp0qJYuXaqIiAhnZsWKFZoyZYrzKaacnJyLfjcNAABoW1y2bdvhXsTlUFNTI8uyFAwG29z1Md0eXxvuJeAKOvj0HeFeAq4gXt9tS1t8fTfl729+dxIAADASEQMAAIxExAAAACMRMQAAwEhEDAAAMBIRAwAAjETEAAAAIxExAADASEQMAAAwEhEDAACMRMQAAAAjETEAAMBIRAwAADASEQMAAIxExAAAACMRMQAAwEhEDAAAMBIRAwAAjETEAAAAIxExAADASEQMAAAwEhEDAACMRMQAAAAjETEAAMBIRAwAADASEQMAAIxExAAAACMRMQAAwEhEDAAAMBIRAwAAjETEAAAAIxExAADASEQMAAAwEhEDAACMRMQAAAAjETEAAMBIRAwAADASEQMAAIxExAAAACMRMQAAwEhEDAAAMBIRAwAAjNTkiPnzn/+sO++8Uz6fTy6XS2vWrAl53LZtzZo1Sz6fT9HR0crIyNCePXtCZurq6jR58mR16dJFHTp0UE5Ojg4fPhwyU11drby8PFmWJcuylJeXpxMnTjT5AAEAQOvU5Ig5ffq0+vXrpwULFlzw8Tlz5ui5557TggULtGPHDnm9Xt122206efKkM5Ofn6/Vq1dr5cqV2rx5s06dOqXs7Gw1NDQ4M7m5uSorK1NhYaEKCwtVVlamvLy8SzhEAADQGrVr6hNGjBihESNGXPAx27Y1f/58Pfnkk7rnnnskSa+++qo8Ho9ef/11TZgwQcFgUK+88oqWLVumzMxMSdLy5cvl9/u1YcMGZWVlqby8XIWFhSopKVFaWpokafHixUpPT9fevXvVq1evSz1eAADQSjTrNTEHDhxQIBDQsGHDnG1ut1tDhgzRli1bJEmlpaU6c+ZMyIzP51NKSoozs3XrVlmW5QSMJA0YMECWZTkz56urq1NNTU3IDQAAtF7NGjGBQECS5PF4QrZ7PB7nsUAgoKioKHXq1OmiM/Hx8Y32Hx8f78ycr6CgwLl+xrIs+f3+b308AACg5bosn05yuVwh923bbrTtfOfPXGj+YvuZOXOmgsGgc6uoqLiElQMAAFM0a8R4vV5JanS2pKqqyjk74/V6VV9fr+rq6ovOHDlypNH+jx492ugsz5fcbrdiY2NDbgAAoPVq1ohJSkqS1+tVUVGRs62+vl7FxcUaOHCgJCk1NVWRkZEhM5WVldq9e7czk56ermAwqO3btzsz27ZtUzAYdGYAAEDb1uRPJ506dUofffSRc//AgQMqKytTXFycrrvuOuXn52v27Nnq2bOnevbsqdmzZ6t9+/bKzc2VJFmWpbFjx2ratGnq3Lmz4uLiNH36dPXp08f5tFLv3r01fPhwjRs3TosWLZIkjR8/XtnZ2XwyCQAASLqEiNm5c6duueUW5/7UqVMlSaNHj9bSpUs1Y8YM1dbWauLEiaqurlZaWprWr1+vmJgY5znz5s1Tu3btNHLkSNXW1mro0KFaunSpIiIinJkVK1ZoypQpzqeYcnJyvvK7aQAAQNvjsm3bDvciLoeamhpZlqVgMNjmro/p9vjacC8BV9DBp+8I9xJwBfH6blva4uu7KX9/87uTAACAkYgYAABgJCIGAAAYiYgBAABGImIAAICRiBgAAGAkIgYAABiJiAEAAEYiYgAAgJGIGAAAYCQiBgAAGImIAQAARiJiAACAkYgYAABgJCIGAAAYiYgBAABGImIAAICRiBgAAGAkIgYAABiJiAEAAEYiYgAAgJGIGAAAYCQiBgAAGImIAQAARiJiAACAkYgYAABgJCIGAAAYiYgBAABGImIAAICRiBgAAGAkIgYAABiJiAEAAEYiYgAAgJGIGAAAYCQiBgAAGImIAQAARiJiAACAkYgYAABgJCIGAAAYiYgBAABGImIAAICRiBgAAGCkZo+YWbNmyeVyhdy8Xq/zuG3bmjVrlnw+n6Kjo5WRkaE9e/aE7KOurk6TJ09Wly5d1KFDB+Xk5Ojw4cPNvVQAAGCwy3Im5rvf/a4qKyud265du5zH5syZo+eee04LFizQjh075PV6ddttt+nkyZPOTH5+vlavXq2VK1dq8+bNOnXqlLKzs9XQ0HA5lgsAAAzU7rLstF27kLMvX7JtW/Pnz9eTTz6pe+65R5L06quvyuPx6PXXX9eECRMUDAb1yiuvaNmyZcrMzJQkLV++XH6/Xxs2bFBWVtblWDIAADDMZTkTs2/fPvl8PiUlJem+++7T/v37JUkHDhxQIBDQsGHDnFm3260hQ4Zoy5YtkqTS0lKdOXMmZMbn8yklJcWZuZC6ujrV1NSE3AAAQOvV7BGTlpam1157Te+8844WL16sQCCggQMH6vjx4woEApIkj8cT8hyPx+M8FggEFBUVpU6dOn3lzIUUFBTIsizn5vf7m/nIAABAS9LsETNixAjde++96tOnjzIzM7V27VpJX7xt9CWXyxXyHNu2G20739fNzJw5U8Fg0LlVVFR8i6MAAAAt3WX/iHWHDh3Up08f7du3z7lO5vwzKlVVVc7ZGa/Xq/r6elVXV3/lzIW43W7FxsaG3AAAQOt12SOmrq5O5eXlSkhIUFJSkrxer4qKipzH6+vrVVxcrIEDB0qSUlNTFRkZGTJTWVmp3bt3OzMAAADN/umk6dOn684779R1112nqqoq/epXv1JNTY1Gjx4tl8ul/Px8zZ49Wz179lTPnj01e/ZstW/fXrm5uZIky7I0duxYTZs2TZ07d1ZcXJymT5/uvD0FAAAgXYaIOXz4sO6//34dO3ZMXbt21YABA1RSUqLExERJ0owZM1RbW6uJEyequrpaaWlpWr9+vWJiYpx9zJs3T+3atdPIkSNVW1uroUOHaunSpYqIiGju5QIAAEO5bNu2w72Iy6GmpkaWZSkYDLa562O6Pb423EvAFXTw6TvCvQRcQby+25a2+Ppuyt/f/O4kAABgJCIGAAAYiYgBAABGImIAAICRiBgAAGAkIgYAABiJiAEAAEYiYgAAgJGIGAAAYCQiBgAAGImIAQAARiJiAACAkYgYAABgJCIGAAAYiYgBAABGImIAAICRiBgAAGAkIgYAABiJiAEAAEYiYgAAgJGIGAAAYCQiBgAAGImIAQAARiJiAACAkYgYAABgJCIGAAAYiYgBAABGImIAAICRiBgAAGAkIgYAABiJiAEAAEYiYgAAgJGIGAAAYCQiBgAAGImIAQAARiJiAACAkYgYAABgJCIGAAAYiYgBAABGImIAAICRiBgAAGAkIgYAABipxUfMyy+/rKSkJF199dVKTU3VX/7yl3AvCQAAtAAtOmJWrVql/Px8Pfnkk/rggw/0b//2bxoxYoQOHToU7qUBAIAwa9ER89xzz2ns2LH693//d/Xu3Vvz58+X3+/XwoULw700AAAQZu3CvYCvUl9fr9LSUj3++OMh24cNG6YtW7Y0mq+rq1NdXZ1zPxgMSpJqamou70JboHN1n4V7CbiC2uL/x9syXt9tS1t8fX95zLZtf+1si42YY8eOqaGhQR6PJ2S7x+NRIBBoNF9QUKCnnnqq0Xa/33/Z1gi0BNb8cK8AwOXSll/fJ0+elGVZF51psRHzJZfLFXLftu1G2yRp5syZmjp1qnP/3Llz+vTTT9W5c+cLzqN1qampkd/vV0VFhWJjY8O9HADNiNd322Lbtk6ePCmfz/e1sy02Yrp06aKIiIhGZ12qqqoanZ2RJLfbLbfbHbLtmmuuuZxLRAsUGxvLv+SAVorXd9vxdWdgvtRiL+yNiopSamqqioqKQrYXFRVp4MCBYVoVAABoKVrsmRhJmjp1qvLy8tS/f3+lp6frv//7v3Xo0CE99NBD4V4aAAAIsxYdMaNGjdLx48f1i1/8QpWVlUpJSdG6deuUmJgY7qWhhXG73fr5z3/e6C1FAObj9Y2v4rK/yWeYAAAAWpgWe00MAADAxRAxAADASEQMAAAwEhEDAACMRMQAAAAjETEw1rJly/T9739fPp9P//d//ydJmj9/vt5+++0wrwwAcCUQMTDSwoULNXXqVN1+++06ceKEGhoaJH3xqybmz58f3sUBaFb19fXau3evzp49G+6loIUhYmCkF198UYsXL9aTTz6piIgIZ3v//v21a9euMK4MQHP57LPPNHbsWLVv317f/e53dejQIUnSlClT9PTTT4d5dWgJiBgY6cCBA7rpppsabXe73Tp9+nQYVgSguc2cOVMffvihNm3apKuvvtrZnpmZqVWrVoVxZWgpiBgYKSkpSWVlZY22//GPf1RycvKVXxCAZrdmzRotWLBAgwYNksvlcrYnJyfr448/DuPK0FK06N+dBHyVRx99VJMmTdLnn38u27a1fft2vfHGGyooKNCvf/3rcC8PQDM4evSo4uPjG20/ffp0SNSg7SJiYKSf/OQnOnv2rGbMmKHPPvtMubm5+pd/+Rc9//zzuu+++8K9PADN4Oabb9batWs1efJkSXLCZfHixUpPTw/n0tBC8AsgYbxjx47p3LlzF/wvNgDm2rJli4YPH64HHnhAS5cu1YQJE7Rnzx5t3bpVxcXFSk1NDfcSEWZcEwPjdenShYABWqGBAwfqvffe02effabrr79e69evl8fj0datWwkYSOJMDAyVlJR00ffE9+/ffwVXAwAIB66JgZHy8/ND7p85c0YffPCBCgsL9eijj4ZnUQCa1fvvv6/IyEj16dNHkvT2229ryZIlSk5O1qxZsxQVFRXmFSLcOBODVuWll17Szp07tWTJknAvBcC3dPPNN+vxxx/Xvffeq/379ys5OVn33HOPduzYoTvuuINv5wYRg9Zl//79uvHGG1VTUxPupQD4lizL0vvvv6/rr79ezzzzjDZu3Kh33nlH7733nu677z5VVFSEe4kIMy7sRavym9/8RnFxceFeBoBmYNu2zp07J0nasGGDbr/9dkmS3+/XsWPHwrk0tBBcEwMj3XTTTSEX9tq2rUAgoKNHj+rll18O48oANJf+/fvrV7/6lTIzM1VcXKyFCxdK+uLXjng8njCvDi0BEQMj3X333SH3r7rqKnXt2lUZGRn6zne+E55FAWhW8+fP1wMPPKA1a9boySefVI8ePSR9ccZ14MCBYV4dWgKuiYFxzp49qxUrVigrK0terzfcywFwhX3++eeKiIhQZGRkuJeCMCNiYKT27durvLxciYmJ4V4KACBMeDsJRkpLS9MHH3xAxACtTKdOnb7xL3f89NNPL/Nq0NIRMTDSxIkTNW3aNB0+fFipqanq0KFDyON9+/YN08oAfBt89wuagreTYJQHH3xQ8+fP1zXXXNPoMZfLJdu25XK51NDQcOUXBwC4oogYGCUiIkKVlZWqra296BxvMwGtS21trc6cOROyLTY2NkyrQUvB20kwypfNTaQArd/p06f12GOP6c0339Tx48cbPc4ZV/CNvTDON73oD4DZZsyYoY0bN+rll1+W2+3Wr3/9az311FPy+Xx67bXXwr08tAC8nQSjXHXVVbIs62tDhk8tAOa77rrr9NprrykjI0OxsbF6//331aNHDy1btkxvvPGG1q1bF+4lIsx4OwnGeeqpp2RZVriXAeAy+/TTT5WUlCTpi+tfvvyPk0GDBuk//uM/wrk0tBBEDIxz3333KT4+PtzLAHCZde/eXQcPHlRiYqKSk5P15ptv6l//9V/1+9///oKfUETbwzUxMArXwwCt3/79+3Xu3Dn95Cc/0YcffihJmjlzpnNtzE9/+lM9+uijYV4lWgKuiYFRrrrqKgUCAc7EAK3Yl1+l8OXrfNSoUXrhhRdUV1ennTt36vrrr1e/fv3CvEq0BEQMAKBFOf8/VmJiYvThhx+qe/fuYV4ZWhreTgIAAEYiYgAALYrL5Wp0/RvXw+FC+HQSAKBFsW1bY8aMkdvtliR9/vnneuihhxr9ote33norHMtDC0LEAABalNGjR4fc/9GPfhSmlaCl48JeAABgJK6JAQAARiJiAACAkYgYAABgJCIGAAAYiYgBgGaWkZGh/Pz8cC8DaPWIGACNfPllY191GzNmTLiX2OwID8A8fE8MgEYqKyudP69atUr/+Z//qb179zrboqOjQ+bPnDmjyMjIK7a+5mTy2oG2jjMxABrxer3OzbIsuVwu5/7nn3+ua665Rm+++aYyMjJ09dVXa/ny5Tp+/Ljuv/9+XXvttWrfvr369OmjN954I2S/GRkZmjJlimbMmKG4uDh5vV7NmjUrZGbWrFm67rrr5Ha75fP5NGXKFOexbt266Ze//KVyc3PVsWNH+Xw+vfjiiyHPP3TokO666y517NhRsbGxGjlypI4cORKy/xtvvFH/8z//o+7du8vtdmv06NEqLi7W888/75xtOnjwoCTpb3/7m26//XZ17NhRHo9HeXl5OnbsmLO/06dP68c//rE6duyohIQEzZ07t5n+VwDwdYgYAJfkscce05QpU1ReXq6srCx9/vnnSk1N1R/+8Aft3r1b48ePV15enrZt2xbyvFdffVUdOnTQtm3bNGfOHP3iF79QUVGRJOk3v/mN5s2bp0WLFmnfvn1as2aN+vTpE/L8Z599Vn379tX777+vmTNn6qc//anzfNu2dffdd+vTTz9VcXGxioqK9PHHH2vUqFEh+/joo4/05ptv6re//a3Kysr0wgsvKD09XePGjVNlZaUqKyvl9/tVWVmpIUOG6MYbb9TOnTtVWFioI0eOaOTIkc6+Hn30Ub377rtavXq11q9fr02bNqm0tPRy/CMHcD4bAC5iyZIltmVZzv0DBw7Ykuz58+d/7XNvv/12e9q0ac79IUOG2IMGDQqZufnmm+3HHnvMtm3bnjt3rn3DDTfY9fX1F9xfYmKiPXz48JBto0aNskeMGGHbtm2vX7/ejoiIsA8dOuQ8vmfPHluSvX37dtu2bfvnP/+5HRkZaVdVVYXsZ8iQIfYjjzwSsu1nP/uZPWzYsJBtFRUVtiR779699smTJ+2oqCh75cqVzuPHjx+3o6OjG+0LQPPjTAyAS9K/f/+Q+w0NDfqv//ov9e3bV507d1bHjh21fv16HTp0KGSub9++IfcTEhJUVVUlSfrhD3+o2tpade/eXePGjdPq1at19uzZkPn09PRG98vLyyVJ5eXl8vv98vv9zuPJycm65pprnBlJSkxMVNeuXb/2GEtLS/Xuu++qY8eOzu073/mOJOnjjz/Wxx9/rPr6+pA1xcXFqVevXl+7bwDfHhED4JKc/xuF586dq3nz5mnGjBnauHGjysrKlJWVpfr6+pC58y+idblcOnfunCTJ7/dr7969eumllxQdHa2JEydq8ODBOnPmzEXX4nK5JH3xdtKXf/5n528/f+1f5dy5c7rzzjtVVlYWctu3b58GDx4sm189B4QVEQOgWfzlL3/RXXfdpR/96Efq16+funfvrn379jV5P9HR0crJydELL7ygTZs2aevWrdq1a5fzeElJSch8SUmJc3YkOTlZhw4dUkVFhfP43/72NwWDQfXu3fuiPzcqKkoNDQ0h2773ve9pz5496tatm3r06BFy69Chg3r06KHIyMiQNVVXV+sf//hHk48bQNMRMQCaRY8ePVRUVKQtW7aovLxcEyZMUCAQaNI+li5dqldeeUW7d+/W/v37tWzZMkVHRysxMdGZee+99zRnzhz94x//0EsvvaT//d//1SOPPCJJyszMVN++ffXAAw/o/fff1/bt2/XjH/9YQ4YMafT21/m6deumbdu26eDBgzp27JjOnTunSZMm6dNPP9X999+v7du3a//+/Vq/fr0efPBBNTQ0qGPHjho7dqweffRR/elPf9Lu3bs1ZswYXXUV/2oFrgReaQCaxc9+9jN973vfU1ZWljIyMuT1enX33Xc3aR/XXHONFi9erO9///vq27ev/vSnP+n3v/+9Onfu7MxMmzZNpaWluummm/TLX/5Sc+fOVVZWlqQv3lZas2aNOnXqpMGDByszM1Pdu3fXqlWrvvZnT58+XREREUpOTlbXrl116NAh+Xw+vffee2poaFBWVpZSUlL0yCOPyLIsJ1SeffZZDR48WDk5OcrMzNSgQYOUmprapOMGcGlcNm/qAjBEt27dlJ+fzzfrApDEmRgAAGAoIgYAABiJt5MAAICROBMDAACMRMQAAAAjETEAAMBIRAwAADASEQMAAIxExAAAACMRMQAAwEhEDAAAMNL/A5Tv3tTTdt7YAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "spaceship['Transported'].value_counts().plot(kind='bar')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AgeRoomServiceFoodCourtShoppingMallSpaVRDeckTransportedHomePlanet_EuropaHomePlanet_MarsCryoSleep_TrueCabin_BCabin_CCabin_DCabin_ECabin_FCabin_GCabin_TDestination_PSO J318.5-22Destination_TRAPPIST-1eVIP_True
039.00.00.00.00.00.0FalseTrueFalseFalseTrueFalseFalseFalseFalseFalseFalseFalseTrueFalse
124.0109.09.025.0549.044.0TrueFalseFalseFalseFalseFalseFalseFalseTrueFalseFalseFalseTrueFalse
258.043.03576.00.06715.049.0FalseTrueFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueTrue
333.00.01283.0371.03329.0193.0FalseTrueFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
416.0303.070.0151.0565.02.0TrueFalseFalseFalseFalseFalseFalseFalseTrueFalseFalseFalseTrueFalse
\n", + "
" + ], + "text/plain": [ + " Age RoomService ... Destination_TRAPPIST-1e VIP_True\n", + "0 39.0 0.0 ... True False\n", + "1 24.0 109.0 ... True False\n", + "2 58.0 43.0 ... True True\n", + "3 33.0 0.0 ... True False\n", + "4 16.0 303.0 ... True False\n", + "\n", + "[5 rows x 20 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "spaceship = spaceship.drop(columns=['PassengerId', 'Name'])\n", + "cat_columns = spaceship.select_dtypes(include=object).columns\n", + "\n", + "spaceship = pd.get_dummies(spaceship, columns=cat_columns, drop_first=True)\n", + "spaceship.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "scaler = StandardScaler()\n", + "\n", + "X = spaceship.drop(columns='Transported')\n", + "y = spaceship['Transported']" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n", + "\n", + "X_train_scaled = scaler.fit_transform(X_train)\n", + "X_test_scaled = scaler.transform(X_test)" ] }, { @@ -237,11 +506,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "# Random Forest\n", + "rf = RandomForestClassifier(\n", + " n_estimators=100,\n", + " random_state=0)\n", + "\n", + "rf.fit(X_train_scaled, y_train)\n", + "\n", + "y_pred = rf.predict(X_test_scaled)" ] }, { @@ -253,11 +529,33 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy (baseline): 0.7904689863842662\n", + "[[531 130]\n", + " [147 514]]\n", + " precision recall f1-score support\n", + "\n", + " False 0.78 0.80 0.79 661\n", + " True 0.80 0.78 0.79 661\n", + "\n", + " accuracy 0.79 1322\n", + " macro avg 0.79 0.79 0.79 1322\n", + "weighted avg 0.79 0.79 0.79 1322\n", + "\n" + ] + } + ], "source": [ - "#your code here" + "acc = accuracy_score(y_test, y_pred)\n", + "print(\"Accuracy (baseline):\", acc)\n", + "print(confusion_matrix(y_test, y_pred))\n", + "print(classification_report(y_test, y_pred))" ] }, { @@ -283,11 +581,15 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "grid = {\n", + " \"n_estimators\": [100, 200, 500],\n", + " \"max_depth\": [None, 10, 30],\n", + " \"min_samples_split\": [2, 5, 10],\n", + " \"min_samples_leaf\": [1, 2, 4]}" ] }, { @@ -299,10 +601,873 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
GridSearchCV(cv=5, estimator=RandomForestClassifier(random_state=0), n_jobs=-1,\n",
+       "             param_grid={'max_depth': [None, 10, 30],\n",
+       "                         'min_samples_leaf': [1, 2, 4],\n",
+       "                         'min_samples_split': [2, 5, 10],\n",
+       "                         'n_estimators': [100, 200, 500]},\n",
+       "             scoring='accuracy')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "GridSearchCV(cv=5, estimator=RandomForestClassifier(random_state=0), n_jobs=-1,\n", + " param_grid={'max_depth': [None, 10, 30],\n", + " 'min_samples_leaf': [1, 2, 4],\n", + " 'min_samples_split': [2, 5, 10],\n", + " 'n_estimators': [100, 200, 500]},\n", + " scoring='accuracy')" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = GridSearchCV(\n", + " estimator=rf,\n", + " param_grid=grid,\n", + " cv=5,\n", + " scoring=\"accuracy\",\n", + " n_jobs=-1)\n", + "\n", + "model.fit(X_train_scaled, y_train)" + ] }, { "cell_type": "markdown", @@ -313,15 +1478,52 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.7829046898638427\n", + "[[511 150]\n", + " [137 524]]\n", + " precision recall f1-score support\n", + "\n", + " False 0.79 0.77 0.78 661\n", + " True 0.78 0.79 0.79 661\n", + "\n", + " accuracy 0.78 1322\n", + " macro avg 0.78 0.78 0.78 1322\n", + "weighted avg 0.78 0.78 0.78 1322\n", + "\n" + ] + } + ], + "source": [ + "best_rf = model.best_estimator_\n", + "\n", + "y_pred = best_rf.predict(X_test_scaled)\n", + "\n", + "print(\"Accuracy:\", accuracy_score(y_test, y_pred))\n", + "print(confusion_matrix(y_test, y_pred))\n", + "print(classification_report(y_test, y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "En este lab se ha aplicado hyperparameter tuning sobre el modelo Random Forest, que había sido el que mejor rendimiento mostró en el lab anterior.\n", + "Tras realizar Grid Search con validación cruzada, el rendimiento obtenido es similar, e incluso ligeramente inferior, al del modelo baseline.\n", + "\n", + "En este caso, los resultados indican que el Random Forest con parámetros por defecto ya ofrecía un buen equilibrio, y que el tuning no aporta una mejora significativa en la capacidad de generalización del modelo." + ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "base", "language": "python", "name": "python3" }, @@ -335,7 +1537,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.13.9" } }, "nbformat": 4,