From 05b09d17c9954e56a766bb15791073df416a7b1d Mon Sep 17 00:00:00 2001 From: ghv29 Date: Thu, 26 Feb 2026 15:49:14 +0100 Subject: [PATCH] Lab Solved --- lab-hyper-tuning.ipynb | 3544 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 3510 insertions(+), 34 deletions(-) diff --git a/lab-hyper-tuning.ipynb b/lab-hyper-tuning.ipynb index 847d487..c4f4a1d 100644 --- a/lab-hyper-tuning.ipynb +++ b/lab-hyper-tuning.ipynb @@ -35,14 +35,23 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "#Libraries\n", "import pandas as pd\n", "import numpy as np\n", - "from sklearn.model_selection import train_test_split" + "from sklearn.model_selection import train_test_split\n", + "from sklearn.tree import DecisionTreeRegressor\n", + "from sklearn.ensemble import BaggingRegressor, RandomForestRegressor,AdaBoostRegressor, GradientBoostingRegressor\n", + "\n", + "# New in here:\n", + "from sklearn.model_selection import GridSearchCV\n", + "from sklearn.model_selection import RandomizedSearchCV\n", + "\n", + "from sklearn.preprocessing import MinMaxScaler, StandardScaler\n", + "from sklearn.metrics import r2_score, mean_absolute_error, root_mean_squared_error" ] }, { @@ -221,94 +230,3561 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "spaceship = spaceship.dropna()" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 4, "metadata": {}, + "outputs": [], "source": [ - "- Now let's use the best model we got so far in order to see how it can improve when we fine tune it's hyperparameters." + "spaceship[\"Cabin\"] = spaceship[\"Cabin\"].str.split('/').str[0]" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "spaceship = spaceship.drop(columns= [\"PassengerId\", \"Name\"])" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 6, "metadata": {}, + "outputs": [], "source": [ - "- Evaluate your model" + "spaceship = pd.get_dummies(spaceship, columns = ['HomePlanet','CryoSleep', 'Cabin', 'Destination', 'VIP'], drop_first = True, dtype=int)" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "spaceship[\"Transported\"]= spaceship[\"Transported\"].astype(int)" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 9, "metadata": {}, + "outputs": [], "source": [ - "**Grid/Random Search**" + "features = spaceship.drop(columns=[\"Transported\"])\n", + "target = spaceship[\"Transported\"]" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 10, "metadata": {}, + "outputs": [], "source": [ - "For this lab we will use Grid Search." + "X_train, X_test, y_train, y_test = train_test_split(features, target, test_size = 0.20, random_state=0)" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 12, "metadata": {}, + "outputs": [], "source": [ - "- Define hyperparameters to fine tune." + "normalizer = MinMaxScaler() " ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
MinMaxScaler()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "MinMaxScaler()" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code here" + "normalizer.fit(X_train)" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 15, "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AgeRoomServiceFoodCourtShoppingMallSpaVRDeckHomePlanet_EuropaHomePlanet_MarsCryoSleep_TrueCabin_BCabin_CCabin_DCabin_ECabin_FCabin_GCabin_TDestination_PSO J318.5-22Destination_TRAPPIST-1eVIP_True
00.4050630.00.0000000.00.0000000.00.01.01.00.00.00.01.00.00.00.00.01.00.0
10.0506330.00.0000000.00.0000000.00.00.01.00.00.00.00.00.01.00.00.01.00.0
20.3797470.00.0079160.00.0512760.00.00.00.00.00.00.00.01.00.00.00.01.00.0
\n", + "
" + ], + "text/plain": [ + " Age RoomService FoodCourt ShoppingMall Spa VRDeck \\\n", + "0 0.405063 0.0 0.000000 0.0 0.000000 0.0 \n", + "1 0.050633 0.0 0.000000 0.0 0.000000 0.0 \n", + "2 0.379747 0.0 0.007916 0.0 0.051276 0.0 \n", + "\n", + " HomePlanet_Europa HomePlanet_Mars CryoSleep_True Cabin_B Cabin_C \\\n", + "0 0.0 1.0 1.0 0.0 0.0 \n", + "1 0.0 0.0 1.0 0.0 0.0 \n", + "2 0.0 0.0 0.0 0.0 0.0 \n", + "\n", + " Cabin_D Cabin_E Cabin_F Cabin_G Cabin_T Destination_PSO J318.5-22 \\\n", + "0 0.0 1.0 0.0 0.0 0.0 0.0 \n", + "1 0.0 0.0 0.0 1.0 0.0 0.0 \n", + "2 0.0 0.0 1.0 0.0 0.0 0.0 \n", + "\n", + " Destination_TRAPPIST-1e VIP_True \n", + "0 1.0 0.0 \n", + "1 1.0 0.0 \n", + "2 1.0 0.0 " + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "- Run Grid Search" + "X_train_norm = normalizer.transform(X_train)\n", + "X_train_norm = pd.DataFrame(X_train_norm, columns = X_train.columns)\n", + "X_train_norm.head(3)" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", + "execution_count": 16, "metadata": {}, - "source": [ - "- Evaluate your model" + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AgeRoomServiceFoodCourtShoppingMallSpaVRDeckHomePlanet_EuropaHomePlanet_MarsCryoSleep_TrueCabin_BCabin_CCabin_DCabin_ECabin_FCabin_GCabin_TDestination_PSO J318.5-22Destination_TRAPPIST-1eVIP_True
00.6329110.00.00.00.00.00.01.01.00.00.00.00.01.00.00.00.00.00.0
10.2278480.00.00.00.00.00.00.01.00.00.00.00.00.01.00.01.00.00.0
20.1898730.00.00.00.00.00.00.01.00.00.00.00.00.01.00.00.00.00.0
\n", + "
" + ], + "text/plain": [ + " Age RoomService FoodCourt ShoppingMall Spa VRDeck \\\n", + "0 0.632911 0.0 0.0 0.0 0.0 0.0 \n", + "1 0.227848 0.0 0.0 0.0 0.0 0.0 \n", + "2 0.189873 0.0 0.0 0.0 0.0 0.0 \n", + "\n", + " HomePlanet_Europa HomePlanet_Mars CryoSleep_True Cabin_B Cabin_C \\\n", + "0 0.0 1.0 1.0 0.0 0.0 \n", + "1 0.0 0.0 1.0 0.0 0.0 \n", + "2 0.0 0.0 1.0 0.0 0.0 \n", + "\n", + " Cabin_D Cabin_E Cabin_F Cabin_G Cabin_T Destination_PSO J318.5-22 \\\n", + "0 0.0 0.0 1.0 0.0 0.0 0.0 \n", + "1 0.0 0.0 0.0 1.0 0.0 1.0 \n", + "2 0.0 0.0 0.0 1.0 0.0 0.0 \n", + "\n", + " Destination_TRAPPIST-1e VIP_True \n", + "0 0.0 0.0 \n", + "1 0.0 0.0 \n", + "2 0.0 0.0 " + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_test_norm = normalizer.transform(X_test)\n", + "X_test_norm = pd.DataFrame(X_test_norm, columns = X_test.columns)\n", + "X_test_norm.head(3)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Now let's use the best model we got so far in order to see how it can improve when we fine tune it's hyperparameters." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "baggin_reg = BaggingRegressor(DecisionTreeRegressor(max_depth= 20),\n", + " n_estimators= 100,\n", + " max_samples= 2000,\n", + " bootstrap=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
BaggingRegressor(estimator=DecisionTreeRegressor(max_depth=20),\n",
+       "                 max_samples=2000, n_estimators=100)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "BaggingRegressor(estimator=DecisionTreeRegressor(max_depth=20),\n", + " max_samples=2000, n_estimators=100)" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "baggin_reg.fit (X_train_norm, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Evaluate your model" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "pred = baggin_reg.predict(X_test_norm)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bagging Model:\n", + "R2 score: 0.42077745980054726\n", + "RMSE: 0.3805333560279088\n", + "MAE: 0.2736970672264073\n" + ] + } + ], + "source": [ + "print(f\"Bagging Model:\")\n", + "print(f\"R2 score: \", r2_score (y_test, pred))\n", + "print(f\"RMSE: \", root_mean_squared_error (y_test, pred))\n", + "print(f\"MAE: \", mean_absolute_error (y_test, pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Grid/Random Search**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For this lab we will use Grid Search." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Define hyperparameters to fine tune." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "gb_reg = GradientBoostingRegressor()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "grid ={\"n_estimators\" : [100,300],\n", + " \"max_depth\": [3,5]}" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "model = GridSearchCV(estimator= gb_reg, param_grid= grid, cv =3 , n_jobs = -1)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
GridSearchCV(cv=3, estimator=GradientBoostingRegressor(), n_jobs=-1,\n",
+       "             param_grid={'max_depth': [3, 5], 'n_estimators': [100, 300]})
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "GridSearchCV(cv=3, estimator=GradientBoostingRegressor(), n_jobs=-1,\n", + " param_grid={'max_depth': [3, 5], 'n_estimators': [100, 300]})" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.fit(X_train_norm, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Run Grid Search" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'max_depth': 3, 'n_estimators': 100}" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.best_params_" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
GradientBoostingRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "GradientBoostingRegressor()" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "best_model = model.best_estimator_\n", + "best_model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Evaluate your model" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "pred = best_model.predict(X_test_norm)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "R2 Score : 0.4293244345530748\n", + "RMSE: 0.3777153575931634\n", + "MAE: 0.287630188993588\n" + ] + } + ], + "source": [ + "print(f\"R2 Score :\", r2_score ( y_test, pred))\n", + "print(f\"RMSE: \", root_mean_squared_error (y_test, pred))\n", + "print(f\"MAE: \", mean_absolute_error (y_test, pred))\n" ] }, { @@ -321,7 +3797,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "base", "language": "python", "name": "python3" }, @@ -335,7 +3811,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.13.9" } }, "nbformat": 4,