From 38bcf130d3d87c528e9c536b3481961f1e5c4d4a Mon Sep 17 00:00:00 2001 From: andressa Date: Thu, 26 Feb 2026 15:55:57 +0000 Subject: [PATCH] lab-hyperparameter --- lab-hyper-tuning.ipynb | 5932 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 5871 insertions(+), 61 deletions(-) diff --git a/lab-hyper-tuning.ipynb b/lab-hyper-tuning.ipynb index 847d487..8a9c1ae 100644 --- a/lab-hyper-tuning.ipynb +++ b/lab-hyper-tuning.ipynb @@ -35,19 +35,31 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 41, "metadata": {}, "outputs": [], "source": [ - "#Libraries\n", + "from sklearn.datasets import fetch_california_housing\n", "import pandas as pd\n", "import numpy as np\n", - "from sklearn.model_selection import train_test_split" + "\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.tree import DecisionTreeRegressor\n", + "# New in here:\n", + "from sklearn.ensemble import BaggingRegressor, RandomForestRegressor,AdaBoostRegressor, GradientBoostingRegressor\n", + "from sklearn.model_selection import GridSearchCV\n", + "from sklearn.model_selection import RandomizedSearchCV\n", + "\n", + "from sklearn.preprocessing import MinMaxScaler, StandardScaler\n", + "from sklearn.metrics import r2_score, mean_absolute_error, root_mean_squared_error" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -200,7 +212,7 @@ "4 True " ] }, - "execution_count": 2, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -210,99 +222,5895 @@ "spaceship.head()" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now perform the same as before:\n", - "- Feature Scaling\n", - "- Feature Selection\n" - ] - }, { "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "#your code here" - ] - }, - { - "cell_type": "markdown", + "execution_count": 16, "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdHomePlanetCryoSleepCabinDestinationAgeVIPRoomServiceFoodCourtShoppingMallSpaVRDeckNameTransported
00001_01EuropaFalseB/0/PTRAPPIST-1e39.0False0.00.00.00.00.0Maham OfracculyFalse
10002_01EarthFalseF/0/STRAPPIST-1e24.0False109.09.025.0549.044.0Juanna VinesTrue
20003_01EuropaFalseA/0/STRAPPIST-1e58.0True43.03576.00.06715.049.0Altark SusentFalse
30003_02EuropaFalseA/0/STRAPPIST-1e33.0False0.01283.0371.03329.0193.0Solam SusentFalse
40004_01EarthFalseF/1/STRAPPIST-1e16.0False303.070.0151.0565.02.0Willy SantantinesTrue
.............................................
86889276_01EuropaFalseA/98/P55 Cancri e41.0True0.06819.00.01643.074.0Gravior NoxnutherFalse
86899278_01EarthTrueG/1499/SPSO J318.5-2218.0False0.00.00.00.00.0Kurta MondalleyFalse
86909279_01EarthFalseG/1500/STRAPPIST-1e26.0False0.00.01872.01.00.0Fayey ConnonTrue
86919280_01EuropaFalseE/608/S55 Cancri e32.0False0.01049.00.0353.03235.0Celeon HontichreFalse
86929280_02EuropaFalseE/608/STRAPPIST-1e44.0False126.04688.00.00.012.0Propsh HontichreTrue
\n", + "

6606 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " PassengerId HomePlanet CryoSleep Cabin Destination Age VIP \\\n", + "0 0001_01 Europa False B/0/P TRAPPIST-1e 39.0 False \n", + "1 0002_01 Earth False F/0/S TRAPPIST-1e 24.0 False \n", + "2 0003_01 Europa False A/0/S TRAPPIST-1e 58.0 True \n", + "3 0003_02 Europa False A/0/S TRAPPIST-1e 33.0 False \n", + "4 0004_01 Earth False F/1/S TRAPPIST-1e 16.0 False \n", + "... ... ... ... ... ... ... ... \n", + "8688 9276_01 Europa False A/98/P 55 Cancri e 41.0 True \n", + "8689 9278_01 Earth True G/1499/S PSO J318.5-22 18.0 False \n", + "8690 9279_01 Earth False G/1500/S TRAPPIST-1e 26.0 False \n", + "8691 9280_01 Europa False E/608/S 55 Cancri e 32.0 False \n", + "8692 9280_02 Europa False E/608/S TRAPPIST-1e 44.0 False \n", + "\n", + " RoomService FoodCourt ShoppingMall Spa VRDeck Name \\\n", + "0 0.0 0.0 0.0 0.0 0.0 Maham Ofracculy \n", + "1 109.0 9.0 25.0 549.0 44.0 Juanna Vines \n", + "2 43.0 3576.0 0.0 6715.0 49.0 Altark Susent \n", + "3 0.0 1283.0 371.0 3329.0 193.0 Solam Susent \n", + "4 303.0 70.0 151.0 565.0 2.0 Willy Santantines \n", + "... ... ... ... ... ... ... \n", + "8688 0.0 6819.0 0.0 1643.0 74.0 Gravior Noxnuther \n", + "8689 0.0 0.0 0.0 0.0 0.0 Kurta Mondalley \n", + "8690 0.0 0.0 1872.0 1.0 0.0 Fayey Connon \n", + "8691 0.0 1049.0 0.0 353.0 3235.0 Celeon Hontichre \n", + "8692 126.0 4688.0 0.0 0.0 12.0 Propsh Hontichre \n", + "\n", + " Transported \n", + "0 False \n", + "1 True \n", + "2 False \n", + "3 False \n", + "4 True \n", + "... ... \n", + "8688 False \n", + "8689 False \n", + "8690 True \n", + "8691 False \n", + "8692 True \n", + "\n", + "[6606 rows x 14 columns]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "- Now let's use the best model we got so far in order to see how it can improve when we fine tune it's hyperparameters." + "df_spaceship = spaceship.dropna()\n", + "df_spaceship" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\leolo\\AppData\\Local\\Temp\\ipykernel_30612\\3941257211.py:2: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df_spaceship['Cabin'] = df_spaceship['Cabin'].map(lambda x: x[0])\n" + ] + } + ], "source": [ - "#your code here" + "has_null_cabin = df_spaceship['Cabin'].isnull().any()\n", + "df_spaceship['Cabin'] = df_spaceship['Cabin'].map(lambda x: x[0])" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 18, "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['B' 'F' 'A' 'G' 'E' 'C' 'D' 'T']\n" + ] + } + ], "source": [ - "- Evaluate your model" + "print(df_spaceship['Cabin'].unique())" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ - "#your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Grid/Random Search**" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For this lab we will use Grid Search." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "- Define hyperparameters to fine tune." + "df_cleaned = df_spaceship.drop(columns=['PassengerId', 'Name'])" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['HomePlanet', 'CryoSleep', 'Cabin', 'Destination', 'VIP',\n", + " 'Transported'],\n", + " dtype='object')" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code here" + "non_numerical_columns = df_cleaned.select_dtypes(exclude=['number']).columns\n", + "non_numerical_columns" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 21, "metadata": {}, - "source": [ + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AgeRoomServiceFoodCourtShoppingMallSpaVRDeckHomePlanet_EuropaHomePlanet_MarsCryoSleep_TrueCabin_BCabin_CCabin_DCabin_ECabin_FCabin_GCabin_TDestination_PSO J318.5-22Destination_TRAPPIST-1eVIP_TrueTransported_True
039.00.00.00.00.00.010010000000100
124.0109.09.025.0549.044.000000001000101
258.043.03576.00.06715.049.010000000000110
333.00.01283.0371.03329.0193.010000000000100
416.0303.070.0151.0565.02.000000001000101
...............................................................
868841.00.06819.00.01643.074.010000000000010
868918.00.00.00.00.00.000100000101000
869026.00.00.01872.01.00.000000000100101
869132.00.01049.00.0353.03235.010000010000000
869244.0126.04688.00.00.012.010000010000101
\n", + "

6606 rows × 20 columns

\n", + "
" + ], + "text/plain": [ + " Age RoomService FoodCourt ShoppingMall Spa VRDeck \\\n", + "0 39.0 0.0 0.0 0.0 0.0 0.0 \n", + "1 24.0 109.0 9.0 25.0 549.0 44.0 \n", + "2 58.0 43.0 3576.0 0.0 6715.0 49.0 \n", + "3 33.0 0.0 1283.0 371.0 3329.0 193.0 \n", + "4 16.0 303.0 70.0 151.0 565.0 2.0 \n", + "... ... ... ... ... ... ... \n", + "8688 41.0 0.0 6819.0 0.0 1643.0 74.0 \n", + "8689 18.0 0.0 0.0 0.0 0.0 0.0 \n", + "8690 26.0 0.0 0.0 1872.0 1.0 0.0 \n", + "8691 32.0 0.0 1049.0 0.0 353.0 3235.0 \n", + "8692 44.0 126.0 4688.0 0.0 0.0 12.0 \n", + "\n", + " HomePlanet_Europa HomePlanet_Mars CryoSleep_True Cabin_B Cabin_C \\\n", + "0 1 0 0 1 0 \n", + "1 0 0 0 0 0 \n", + "2 1 0 0 0 0 \n", + "3 1 0 0 0 0 \n", + "4 0 0 0 0 0 \n", + "... ... ... ... ... ... \n", + "8688 1 0 0 0 0 \n", + "8689 0 0 1 0 0 \n", + "8690 0 0 0 0 0 \n", + "8691 1 0 0 0 0 \n", + "8692 1 0 0 0 0 \n", + "\n", + " Cabin_D Cabin_E Cabin_F Cabin_G Cabin_T Destination_PSO J318.5-22 \\\n", + "0 0 0 0 0 0 0 \n", + "1 0 0 1 0 0 0 \n", + "2 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 \n", + "4 0 0 1 0 0 0 \n", + "... ... ... ... ... ... ... \n", + "8688 0 0 0 0 0 0 \n", + "8689 0 0 0 1 0 1 \n", + "8690 0 0 0 1 0 0 \n", + "8691 0 1 0 0 0 0 \n", + "8692 0 1 0 0 0 0 \n", + "\n", + " Destination_TRAPPIST-1e VIP_True Transported_True \n", + "0 1 0 0 \n", + "1 1 0 1 \n", + "2 1 1 0 \n", + "3 1 0 0 \n", + "4 1 0 1 \n", + "... ... ... ... \n", + "8688 0 1 0 \n", + "8689 0 0 0 \n", + "8690 1 0 1 \n", + "8691 0 0 0 \n", + "8692 1 0 1 \n", + "\n", + "[6606 rows x 20 columns]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "non_numerical_columns = pd.get_dummies(df_cleaned, columns=non_numerical_columns, drop_first=True, dtype=int)\n", + "non_numerical_columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now perform the same as before:\n", + "- Feature Scaling\n", + "- Feature Selection\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AgeRoomServiceFoodCourtShoppingMallSpaVRDeckHomePlanet_EuropaHomePlanet_MarsCryoSleep_TrueCabin_BCabin_CCabin_DCabin_ECabin_FCabin_GCabin_TDestination_PSO J318.5-22Destination_TRAPPIST-1eVIP_True
039.00.00.00.00.00.01001000000010
124.0109.09.025.0549.044.00000000100010
258.043.03576.00.06715.049.01000000000011
333.00.01283.0371.03329.0193.01000000000010
416.0303.070.0151.0565.02.00000000100010
............................................................
868841.00.06819.00.01643.074.01000000000001
868918.00.00.00.00.00.00010000010100
869026.00.00.01872.01.00.00000000010010
869132.00.01049.00.0353.03235.01000001000000
869244.0126.04688.00.00.012.01000001000010
\n", + "

6606 rows × 19 columns

\n", + "
" + ], + "text/plain": [ + " Age RoomService FoodCourt ShoppingMall Spa VRDeck \\\n", + "0 39.0 0.0 0.0 0.0 0.0 0.0 \n", + "1 24.0 109.0 9.0 25.0 549.0 44.0 \n", + "2 58.0 43.0 3576.0 0.0 6715.0 49.0 \n", + "3 33.0 0.0 1283.0 371.0 3329.0 193.0 \n", + "4 16.0 303.0 70.0 151.0 565.0 2.0 \n", + "... ... ... ... ... ... ... \n", + "8688 41.0 0.0 6819.0 0.0 1643.0 74.0 \n", + "8689 18.0 0.0 0.0 0.0 0.0 0.0 \n", + "8690 26.0 0.0 0.0 1872.0 1.0 0.0 \n", + "8691 32.0 0.0 1049.0 0.0 353.0 3235.0 \n", + "8692 44.0 126.0 4688.0 0.0 0.0 12.0 \n", + "\n", + " HomePlanet_Europa HomePlanet_Mars CryoSleep_True Cabin_B Cabin_C \\\n", + "0 1 0 0 1 0 \n", + "1 0 0 0 0 0 \n", + "2 1 0 0 0 0 \n", + "3 1 0 0 0 0 \n", + "4 0 0 0 0 0 \n", + "... ... ... ... ... ... \n", + "8688 1 0 0 0 0 \n", + "8689 0 0 1 0 0 \n", + "8690 0 0 0 0 0 \n", + "8691 1 0 0 0 0 \n", + "8692 1 0 0 0 0 \n", + "\n", + " Cabin_D Cabin_E Cabin_F Cabin_G Cabin_T Destination_PSO J318.5-22 \\\n", + "0 0 0 0 0 0 0 \n", + "1 0 0 1 0 0 0 \n", + "2 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 \n", + "4 0 0 1 0 0 0 \n", + "... ... ... ... ... ... ... \n", + "8688 0 0 0 0 0 0 \n", + "8689 0 0 0 1 0 1 \n", + "8690 0 0 0 1 0 0 \n", + "8691 0 1 0 0 0 0 \n", + "8692 0 1 0 0 0 0 \n", + "\n", + " Destination_TRAPPIST-1e VIP_True \n", + "0 1 0 \n", + "1 1 0 \n", + "2 1 1 \n", + "3 1 0 \n", + "4 1 0 \n", + "... ... ... \n", + "8688 0 1 \n", + "8689 0 0 \n", + "8690 1 0 \n", + "8691 0 0 \n", + "8692 1 0 \n", + "\n", + "[6606 rows x 19 columns]" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "features = non_numerical_columns.drop(columns=['Transported_True'])\n", + "features" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 0\n", + "1 1\n", + "2 0\n", + "3 0\n", + "4 1\n", + " ..\n", + "8688 0\n", + "8689 0\n", + "8690 1\n", + "8691 0\n", + "8692 1\n", + "Name: Transported_True, Length: 6606, dtype: int64" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "target = non_numerical_columns[\"Transported_True\"]\n", + "target" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AgeRoomServiceFoodCourtShoppingMallSpaVRDeckHomePlanet_EuropaHomePlanet_MarsCryoSleep_TrueCabin_BCabin_CCabin_DCabin_ECabin_FCabin_GCabin_TDestination_PSO J318.5-22Destination_TRAPPIST-1eVIP_True
343232.00.00.00.00.00.00110001000010
73124.00.00.00.00.00.00010000010010
204230.00.0236.00.01149.00.00000000100010
499917.013.00.0565.0367.01.00100001000010
575526.00.00.00.00.00.00010000010010
\n", + "
" + ], + "text/plain": [ + " Age RoomService FoodCourt ShoppingMall Spa VRDeck \\\n", + "3432 32.0 0.0 0.0 0.0 0.0 0.0 \n", + "7312 4.0 0.0 0.0 0.0 0.0 0.0 \n", + "2042 30.0 0.0 236.0 0.0 1149.0 0.0 \n", + "4999 17.0 13.0 0.0 565.0 367.0 1.0 \n", + "5755 26.0 0.0 0.0 0.0 0.0 0.0 \n", + "\n", + " HomePlanet_Europa HomePlanet_Mars CryoSleep_True Cabin_B Cabin_C \\\n", + "3432 0 1 1 0 0 \n", + "7312 0 0 1 0 0 \n", + "2042 0 0 0 0 0 \n", + "4999 0 1 0 0 0 \n", + "5755 0 0 1 0 0 \n", + "\n", + " Cabin_D Cabin_E Cabin_F Cabin_G Cabin_T Destination_PSO J318.5-22 \\\n", + "3432 0 1 0 0 0 0 \n", + "7312 0 0 0 1 0 0 \n", + "2042 0 0 1 0 0 0 \n", + "4999 0 1 0 0 0 0 \n", + "5755 0 0 0 1 0 0 \n", + "\n", + " Destination_TRAPPIST-1e VIP_True \n", + "3432 1 0 \n", + "7312 1 0 \n", + "2042 1 0 \n", + "4999 1 0 \n", + "5755 1 0 " + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(features, target, test_size= 0.20 , random_state =0)\n", + "X_train.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "normalizer = MinMaxScaler()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
MinMaxScaler()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "MinMaxScaler()" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "normalizer.fit(X_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "X_train_norm = normalizer.transform(X_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[4.05063291e-01, 0.00000000e+00, 0.00000000e+00, ...,\n", + " 0.00000000e+00, 1.00000000e+00, 0.00000000e+00],\n", + " [5.06329114e-02, 0.00000000e+00, 0.00000000e+00, ...,\n", + " 0.00000000e+00, 1.00000000e+00, 0.00000000e+00],\n", + " [3.79746835e-01, 0.00000000e+00, 7.91600979e-03, ...,\n", + " 0.00000000e+00, 1.00000000e+00, 0.00000000e+00],\n", + " ...,\n", + " [4.55696203e-01, 0.00000000e+00, 1.59527723e-01, ...,\n", + " 0.00000000e+00, 1.00000000e+00, 0.00000000e+00],\n", + " [4.30379747e-01, 0.00000000e+00, 1.34169658e-04, ...,\n", + " 0.00000000e+00, 1.00000000e+00, 1.00000000e+00],\n", + " [1.77215190e-01, 2.01612903e-04, 2.95508671e-02, ...,\n", + " 0.00000000e+00, 1.00000000e+00, 0.00000000e+00]], shape=(5284, 19))" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_train_norm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AgeRoomServiceFoodCourtShoppingMallSpaVRDeckHomePlanet_EuropaHomePlanet_MarsCryoSleep_TrueCabin_BCabin_CCabin_DCabin_ECabin_FCabin_GCabin_TDestination_PSO J318.5-22Destination_TRAPPIST-1eVIP_True
00.4050630.00.0000000.00.0000000.00.01.01.00.00.00.01.00.00.00.00.01.00.0
10.0506330.00.0000000.00.0000000.00.00.01.00.00.00.00.00.01.00.00.01.00.0
20.3797470.00.0079160.00.0512760.00.00.00.00.00.00.00.01.00.00.00.01.00.0
\n", + "
" + ], + "text/plain": [ + " Age RoomService FoodCourt ShoppingMall Spa VRDeck \\\n", + "0 0.405063 0.0 0.000000 0.0 0.000000 0.0 \n", + "1 0.050633 0.0 0.000000 0.0 0.000000 0.0 \n", + "2 0.379747 0.0 0.007916 0.0 0.051276 0.0 \n", + "\n", + " HomePlanet_Europa HomePlanet_Mars CryoSleep_True Cabin_B Cabin_C \\\n", + "0 0.0 1.0 1.0 0.0 0.0 \n", + "1 0.0 0.0 1.0 0.0 0.0 \n", + "2 0.0 0.0 0.0 0.0 0.0 \n", + "\n", + " Cabin_D Cabin_E Cabin_F Cabin_G Cabin_T Destination_PSO J318.5-22 \\\n", + "0 0.0 1.0 0.0 0.0 0.0 0.0 \n", + "1 0.0 0.0 0.0 1.0 0.0 0.0 \n", + "2 0.0 0.0 1.0 0.0 0.0 0.0 \n", + "\n", + " Destination_TRAPPIST-1e VIP_True \n", + "0 1.0 0.0 \n", + "1 1.0 0.0 \n", + "2 1.0 0.0 " + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_train_norm = normalizer.transform(X_train)\n", + "X_train_norm = pd.DataFrame(X_train_norm, columns = X_train.columns)\n", + "X_train_norm.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AgeRoomServiceFoodCourtShoppingMallSpaVRDeckHomePlanet_EuropaHomePlanet_MarsCryoSleep_TrueCabin_BCabin_CCabin_DCabin_ECabin_FCabin_GCabin_TDestination_PSO J318.5-22Destination_TRAPPIST-1eVIP_True
00.6329110.00.0000000.00.000000.00.01.01.00.00.00.00.01.00.00.00.00.00.0
10.2278480.00.0000000.00.000000.00.00.01.00.00.00.00.00.01.00.01.00.00.0
20.1898730.00.0000000.00.000000.00.00.01.00.00.00.00.00.01.00.00.00.00.0
30.6582280.00.0000000.00.000000.00.00.01.00.00.00.00.00.01.00.01.00.00.0
40.7848100.00.0547750.00.077740.01.00.00.01.00.00.00.00.00.00.00.01.01.0
\n", + "
" + ], + "text/plain": [ + " Age RoomService FoodCourt ShoppingMall Spa VRDeck \\\n", + "0 0.632911 0.0 0.000000 0.0 0.00000 0.0 \n", + "1 0.227848 0.0 0.000000 0.0 0.00000 0.0 \n", + "2 0.189873 0.0 0.000000 0.0 0.00000 0.0 \n", + "3 0.658228 0.0 0.000000 0.0 0.00000 0.0 \n", + "4 0.784810 0.0 0.054775 0.0 0.07774 0.0 \n", + "\n", + " HomePlanet_Europa HomePlanet_Mars CryoSleep_True Cabin_B Cabin_C \\\n", + "0 0.0 1.0 1.0 0.0 0.0 \n", + "1 0.0 0.0 1.0 0.0 0.0 \n", + "2 0.0 0.0 1.0 0.0 0.0 \n", + "3 0.0 0.0 1.0 0.0 0.0 \n", + "4 1.0 0.0 0.0 1.0 0.0 \n", + "\n", + " Cabin_D Cabin_E Cabin_F Cabin_G Cabin_T Destination_PSO J318.5-22 \\\n", + "0 0.0 0.0 1.0 0.0 0.0 0.0 \n", + "1 0.0 0.0 0.0 1.0 0.0 1.0 \n", + "2 0.0 0.0 0.0 1.0 0.0 0.0 \n", + "3 0.0 0.0 0.0 1.0 0.0 1.0 \n", + "4 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "\n", + " Destination_TRAPPIST-1e VIP_True \n", + "0 0.0 0.0 \n", + "1 0.0 0.0 \n", + "2 0.0 0.0 \n", + "3 0.0 0.0 \n", + "4 1.0 1.0 " + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_test_norm = normalizer.transform(X_test)\n", + "X_test_norm = pd.DataFrame(X_test_norm, columns = X_test.columns)\n", + "\n", + "X_test_norm.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Now let's use the best model we got so far in order to see how it can improve when we fine tune it's hyperparameters." + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
BaggingRegressor(estimator=DecisionTreeRegressor(max_depth=20),\n",
+       "                 max_samples=2000, n_estimators=100)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "BaggingRegressor(estimator=DecisionTreeRegressor(max_depth=20),\n", + " max_samples=2000, n_estimators=100)" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bagging_reg = BaggingRegressor(DecisionTreeRegressor(max_depth = 20),\n", + " n_estimators = 100,\n", + " max_samples = 2000,\n", + " bootstrap = True)\n", + "\n", + "bagging_reg.fit (X_train_norm, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\leolo\\anaconda3\\Lib\\site-packages\\sklearn\\utils\\validation.py:2742: UserWarning: X has feature names, but BaggingRegressor was fitted without feature names\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "pred = bagging_reg.predict(X_test_norm)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "R2 score: 0.41763099015480976\n", + "RMSE: 0.3815655283975448\n", + "MAE: 0.2752686405898975\n" + ] + } + ], + "source": [ + "print(f\"R2 score: \", r2_score (y_test, pred))\n", + "print(f\"RMSE: \", root_mean_squared_error (y_test, pred))\n", + "print(f\"MAE: \", mean_absolute_error (y_test, pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Evaluate your model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Bagging/Pasting achieved the best overall performance, with the highest R² score and the lowest RMSE among the tested models." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Grid/Random Search**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For this lab we will use Grid Search." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Define hyperparameters to fine tune." + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "gb_reg = GradientBoostingRegressor()" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "grid = {\"n_estimators\": [50, 100, 150, 200],\n", + " \"max_leaf_nodes\": [50, 150, 300],\n", + "\t\t\"max_depth\":[5, 15, 30]}" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "model = GridSearchCV(estimator = gb_reg, param_grid = grid, cv = 5, n_jobs = -1)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
GridSearchCV(cv=5, estimator=GradientBoostingRegressor(), n_jobs=-1,\n",
+       "             param_grid={'max_depth': [5, 15, 30],\n",
+       "                         'max_leaf_nodes': [50, 150, 300],\n",
+       "                         'n_estimators': [50, 100, 150, 200]})
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "GridSearchCV(cv=5, estimator=GradientBoostingRegressor(), n_jobs=-1,\n", + " param_grid={'max_depth': [5, 15, 30],\n", + " 'max_leaf_nodes': [50, 150, 300],\n", + " 'n_estimators': [50, 100, 150, 200]})" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.fit(X_train_norm, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ "- Run Grid Search" ] }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'max_depth': 5, 'max_leaf_nodes': 50, 'n_estimators': 50}" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.best_params_" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "best_model = model.best_estimator_" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
GradientBoostingRegressor(max_depth=5, max_leaf_nodes=50, n_estimators=50)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "GradientBoostingRegressor(max_depth=5, max_leaf_nodes=50, n_estimators=50)" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "best_model\n" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\leolo\\anaconda3\\Lib\\site-packages\\sklearn\\utils\\validation.py:2742: UserWarning: X has feature names, but GradientBoostingRegressor was fitted without feature names\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "pred = best_model.predict(X_test_norm)" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "R2 score: 0.4301522235709888\n", + "RMSE: 0.37744131213640725\n", + "MAE: 0.2827461261600591\n" + ] + } + ], + "source": [ + "print(f\"R2 score: \", r2_score (y_test, pred))\n", + "print(f\"RMSE: \", root_mean_squared_error (y_test, pred))\n", + "print(f\"MAE: \", mean_absolute_error (y_test, pred))" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "#Random" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [], + "source": [ + "ada_reg = AdaBoostRegressor(DecisionTreeRegressor())" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [], + "source": [ + "grid_5 = {\"n_estimators\": np.arange(5,100),\n", + " \"estimator__max_leaf_nodes\": np.arange(5,50),\n", + "\t\t\"estimator__max_depth\": np.arange(3,50)}" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "model_5 = RandomizedSearchCV(estimator = ada_reg, param_distributions = grid_5, n_iter = 20, cv = 5, n_jobs = -1)" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
RandomizedSearchCV(cv=5,\n",
+       "                   estimator=AdaBoostRegressor(estimator=DecisionTreeRegressor()),\n",
+       "                   n_iter=20, n_jobs=-1,\n",
+       "                   param_distributions={'estimator__max_depth': array([ 3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,\n",
+       "       20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,\n",
+       "       37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]),\n",
+       "                                        'estimator__max_leaf_nodes': array([ 5,  6,  7,  8,  9, 10, 11, 12, 13...\n",
+       "       22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,\n",
+       "       39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]),\n",
+       "                                        'n_estimators': array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,\n",
+       "       22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,\n",
+       "       39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,\n",
+       "       56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72,\n",
+       "       73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,\n",
+       "       90, 91, 92, 93, 94, 95, 96, 97, 98, 99])})
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "RandomizedSearchCV(cv=5,\n", + " estimator=AdaBoostRegressor(estimator=DecisionTreeRegressor()),\n", + " n_iter=20, n_jobs=-1,\n", + " param_distributions={'estimator__max_depth': array([ 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,\n", + " 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,\n", + " 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]),\n", + " 'estimator__max_leaf_nodes': array([ 5, 6, 7, 8, 9, 10, 11, 12, 13...\n", + " 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,\n", + " 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]),\n", + " 'n_estimators': array([ 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,\n", + " 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,\n", + " 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,\n", + " 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72,\n", + " 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,\n", + " 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])})" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_5.fit(X_train_norm,y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'n_estimators': np.int64(5),\n", + " 'estimator__max_leaf_nodes': np.int64(37),\n", + " 'estimator__max_depth': np.int64(21)}" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_5.best_params_" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [], + "source": [ + "best_model_5 = model_5.best_estimator_" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\leolo\\anaconda3\\Lib\\site-packages\\sklearn\\utils\\validation.py:2742: UserWarning: X has feature names, but AdaBoostRegressor was fitted without feature names\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "pred = best_model_5.predict(X_test_norm)" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "R2 score: 0.41574994080405114\n", + "RMSE: 0.38218125908917516\n", + "MAE: 0.29453667773837383\n" + ] + } + ], + "source": [ + "print(f\"R2 score: \", r2_score (y_test, pred))\n", + "print(f\"RMSE: \", root_mean_squared_error (y_test, pred))\n", + "print(f\"MAE: \", mean_absolute_error (y_test, pred))" + ] }, { "cell_type": "markdown", @@ -316,12 +6124,14 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "#Grid Search achieved the best overall performance, with the highest R² score and the lowest RMSE." + ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "base", "language": "python", "name": "python3" }, @@ -335,7 +6145,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.13.9" } }, "nbformat": 4,