diff --git a/lab-hyper-tuning.ipynb b/lab-hyper-tuning.ipynb index 847d487..888d437 100644 --- a/lab-hyper-tuning.ipynb +++ b/lab-hyper-tuning.ipynb @@ -52,6 +52,177 @@ "outputs": [ { "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "PassengerId", + "rawType": "object", + "type": "string" + }, + { + "name": "HomePlanet", + "rawType": "object", + "type": "string" + }, + { + "name": "CryoSleep", + "rawType": "object", + "type": "unknown" + }, + { + "name": "Cabin", + "rawType": "object", + "type": "string" + }, + { + "name": "Destination", + "rawType": "object", + "type": "string" + }, + { + "name": "Age", + "rawType": "float64", + "type": "float" + }, + { + "name": "VIP", + "rawType": "object", + "type": "unknown" + }, + { + "name": "RoomService", + "rawType": "float64", + "type": "float" + }, + { + "name": "FoodCourt", + "rawType": "float64", + "type": "float" + }, + { + "name": "ShoppingMall", + "rawType": "float64", + "type": "float" + }, + { + "name": "Spa", + "rawType": "float64", + "type": "float" + }, + { + "name": "VRDeck", + "rawType": "float64", + "type": "float" + }, + { + "name": "Name", + "rawType": "object", + "type": "string" + }, + { + "name": "Transported", + "rawType": "bool", + "type": "boolean" + } + ], + "ref": "917b5fb0-7acf-4710-9ae2-2c9bffed4058", + "rows": [ + [ + "0", + "0001_01", + "Europa", + "False", + "B/0/P", + "TRAPPIST-1e", + "39.0", + "False", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "Maham Ofracculy", + "False" + ], + [ + "1", + "0002_01", + "Earth", + "False", + "F/0/S", + "TRAPPIST-1e", + "24.0", + "False", + "109.0", + "9.0", + "25.0", + "549.0", + "44.0", + "Juanna Vines", + "True" + ], + [ + "2", + "0003_01", + "Europa", + "False", + "A/0/S", + "TRAPPIST-1e", + "58.0", + "True", + "43.0", + "3576.0", + "0.0", + "6715.0", + "49.0", + "Altark Susent", + "False" + ], + [ + "3", + "0003_02", + "Europa", + "False", + "A/0/S", + "TRAPPIST-1e", + "33.0", + "False", + "0.0", + "1283.0", + "371.0", + "3329.0", + "193.0", + "Solam Susent", + "False" + ], + [ + "4", + "0004_01", + "Earth", + "False", + "F/1/S", + "TRAPPIST-1e", + "16.0", + "False", + "303.0", + "70.0", + "151.0", + "565.0", + "2.0", + "Willy Santantines", + "True" + ] + ], + "shape": { + "columns": 14, + "rows": 5 + } + }, "text/html": [ "
\n", "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdHomePlanetCryoSleepCabinDestinationAgeVIPRoomServiceFoodCourtShoppingMallSpaVRDeckNameTransportedCabin_letter
00001_01EuropaFalseB/0/PTRAPPIST-1e39.0False0.00.00.00.00.0Maham OfracculyFalseB
10002_01EarthFalseF/0/STRAPPIST-1e24.0False109.09.025.0549.044.0Juanna VinesTrueF
20003_01EuropaFalseA/0/STRAPPIST-1e58.0True43.03576.00.06715.049.0Altark SusentFalseA
30003_02EuropaFalseA/0/STRAPPIST-1e33.0False0.01283.0371.03329.0193.0Solam SusentFalseA
40004_01EarthFalseF/1/STRAPPIST-1e16.0False303.070.0151.0565.02.0Willy SantantinesTrueF
\n", + "
" + ], + "text/plain": [ + " PassengerId HomePlanet CryoSleep Cabin Destination Age VIP \\\n", + "0 0001_01 Europa False B/0/P TRAPPIST-1e 39.0 False \n", + "1 0002_01 Earth False F/0/S TRAPPIST-1e 24.0 False \n", + "2 0003_01 Europa False A/0/S TRAPPIST-1e 58.0 True \n", + "3 0003_02 Europa False A/0/S TRAPPIST-1e 33.0 False \n", + "4 0004_01 Earth False F/1/S TRAPPIST-1e 16.0 False \n", + "\n", + " RoomService FoodCourt ShoppingMall Spa VRDeck Name \\\n", + "0 0.0 0.0 0.0 0.0 0.0 Maham Ofracculy \n", + "1 109.0 9.0 25.0 549.0 44.0 Juanna Vines \n", + "2 43.0 3576.0 0.0 6715.0 49.0 Altark Susent \n", + "3 0.0 1283.0 371.0 3329.0 193.0 Solam Susent \n", + "4 303.0 70.0 151.0 565.0 2.0 Willy Santantines \n", + "\n", + " Transported Cabin_letter \n", + "0 False B \n", + "1 True F \n", + "2 False A \n", + "3 False A \n", + "4 True F " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Cabin is too granular quiero obtener solo la letra como columna nueva\n", + "spaceship['Cabin_letter'] = spaceship['Cabin'].str[0]\n", + "spaceship[['Cabin', 'Cabin_letter']].head()\n", + "spaceship['Cabin_letter'].value_counts()\n", + "spaceship.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "HomePlanet", + "rawType": "object", + "type": "string" + }, + { + "name": "CryoSleep", + "rawType": "object", + "type": "unknown" + }, + { + "name": "Cabin", + "rawType": "object", + "type": "string" + }, + { + "name": "Destination", + "rawType": "object", + "type": "string" + }, + { + "name": "Age", + "rawType": "float64", + "type": "float" + }, + { + "name": "VIP", + "rawType": "object", + "type": "unknown" + }, + { + "name": "RoomService", + "rawType": "float64", + "type": "float" + }, + { + "name": "FoodCourt", + "rawType": "float64", + "type": "float" + }, + { + "name": "ShoppingMall", + "rawType": "float64", + "type": "float" + }, + { + "name": "Spa", + "rawType": "float64", + "type": "float" + }, + { + "name": "VRDeck", + "rawType": "float64", + "type": "float" + }, + { + "name": "Transported", + "rawType": "bool", + "type": "boolean" + }, + { + "name": "Cabin_letter", + "rawType": "object", + "type": "string" + } + ], + "ref": "42eb2bba-2088-4d04-a78c-5186bd1706ec", + "rows": [ + [ + "0", + "Europa", + "False", + "B/0/P", + "TRAPPIST-1e", + "39.0", + "False", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "False", + "B" + ], + [ + "1", + "Earth", + "False", + "F/0/S", + "TRAPPIST-1e", + "24.0", + "False", + "109.0", + "9.0", + "25.0", + "549.0", + "44.0", + "True", + "F" + ], + [ + "2", + "Europa", + "False", + "A/0/S", + "TRAPPIST-1e", + "58.0", + "True", + "43.0", + "3576.0", + "0.0", + "6715.0", + "49.0", + "False", + "A" + ], + [ + "3", + "Europa", + "False", + "A/0/S", + "TRAPPIST-1e", + "33.0", + "False", + "0.0", + "1283.0", + "371.0", + "3329.0", + "193.0", + "False", + "A" + ], + [ + "4", + "Earth", + "False", + "F/1/S", + "TRAPPIST-1e", + "16.0", + "False", + "303.0", + "70.0", + "151.0", + "565.0", + "2.0", + "True", + "F" + ] + ], + "shape": { + "columns": 13, + "rows": 5 + } + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HomePlanetCryoSleepCabinDestinationAgeVIPRoomServiceFoodCourtShoppingMallSpaVRDeckTransportedCabin_letter
0EuropaFalseB/0/PTRAPPIST-1e39.0False0.00.00.00.00.0FalseB
1EarthFalseF/0/STRAPPIST-1e24.0False109.09.025.0549.044.0TrueF
2EuropaFalseA/0/STRAPPIST-1e58.0True43.03576.00.06715.049.0FalseA
3EuropaFalseA/0/STRAPPIST-1e33.0False0.01283.0371.03329.0193.0FalseA
4EarthFalseF/1/STRAPPIST-1e16.0False303.070.0151.0565.02.0TrueF
\n", + "
" + ], + "text/plain": [ + " HomePlanet CryoSleep Cabin Destination Age VIP RoomService \\\n", + "0 Europa False B/0/P TRAPPIST-1e 39.0 False 0.0 \n", + "1 Earth False F/0/S TRAPPIST-1e 24.0 False 109.0 \n", + "2 Europa False A/0/S TRAPPIST-1e 58.0 True 43.0 \n", + "3 Europa False A/0/S TRAPPIST-1e 33.0 False 0.0 \n", + "4 Earth False F/1/S TRAPPIST-1e 16.0 False 303.0 \n", + "\n", + " FoodCourt ShoppingMall Spa VRDeck Transported Cabin_letter \n", + "0 0.0 0.0 0.0 0.0 False B \n", + "1 9.0 25.0 549.0 44.0 True F \n", + "2 3576.0 0.0 6715.0 49.0 False A \n", + "3 1283.0 371.0 3329.0 193.0 False A \n", + "4 70.0 151.0 565.0 2.0 True F " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "spaceship = spaceship.drop(columns=['PassengerId', 'Name'])\n", + "spaceship.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "HomePlanet", + "rawType": "object", + "type": "string" + }, + { + "name": "CryoSleep", + "rawType": "object", + "type": "unknown" + }, + { + "name": "Destination", + "rawType": "object", + "type": "string" + }, + { + "name": "Age", + "rawType": "float64", + "type": "float" + }, + { + "name": "VIP", + "rawType": "object", + "type": "unknown" + }, + { + "name": "RoomService", + "rawType": "float64", + "type": "float" + }, + { + "name": "FoodCourt", + "rawType": "float64", + "type": "float" + }, + { + "name": "ShoppingMall", + "rawType": "float64", + "type": "float" + }, + { + "name": "Spa", + "rawType": "float64", + "type": "float" + }, + { + "name": "VRDeck", + "rawType": "float64", + "type": "float" + }, + { + "name": "Transported", + "rawType": "bool", + "type": "boolean" + }, + { + "name": "Cabin_letter", + "rawType": "object", + "type": "string" + } + ], + "ref": "d294673b-1cbf-4d31-9966-c30ef64a81d9", + "rows": [ + [ + "0", + "Europa", + "False", + "TRAPPIST-1e", + "39.0", + "False", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "False", + "B" + ], + [ + "1", + "Earth", + "False", + "TRAPPIST-1e", + "24.0", + "False", + "109.0", + "9.0", + "25.0", + "549.0", + "44.0", + "True", + "F" + ], + [ + "2", + "Europa", + "False", + "TRAPPIST-1e", + "58.0", + "True", + "43.0", + "3576.0", + "0.0", + "6715.0", + "49.0", + "False", + "A" + ], + [ + "3", + "Europa", + "False", + "TRAPPIST-1e", + "33.0", + "False", + "0.0", + "1283.0", + "371.0", + "3329.0", + "193.0", + "False", + "A" + ], + [ + "4", + "Earth", + "False", + "TRAPPIST-1e", + "16.0", + "False", + "303.0", + "70.0", + "151.0", + "565.0", + "2.0", + "True", + "F" + ] + ], + "shape": { + "columns": 12, + "rows": 5 + } + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HomePlanetCryoSleepDestinationAgeVIPRoomServiceFoodCourtShoppingMallSpaVRDeckTransportedCabin_letter
0EuropaFalseTRAPPIST-1e39.0False0.00.00.00.00.0FalseB
1EarthFalseTRAPPIST-1e24.0False109.09.025.0549.044.0TrueF
2EuropaFalseTRAPPIST-1e58.0True43.03576.00.06715.049.0FalseA
3EuropaFalseTRAPPIST-1e33.0False0.01283.0371.03329.0193.0FalseA
4EarthFalseTRAPPIST-1e16.0False303.070.0151.0565.02.0TrueF
\n", + "
" + ], + "text/plain": [ + " HomePlanet CryoSleep Destination Age VIP RoomService FoodCourt \\\n", + "0 Europa False TRAPPIST-1e 39.0 False 0.0 0.0 \n", + "1 Earth False TRAPPIST-1e 24.0 False 109.0 9.0 \n", + "2 Europa False TRAPPIST-1e 58.0 True 43.0 3576.0 \n", + "3 Europa False TRAPPIST-1e 33.0 False 0.0 1283.0 \n", + "4 Earth False TRAPPIST-1e 16.0 False 303.0 70.0 \n", + "\n", + " ShoppingMall Spa VRDeck Transported Cabin_letter \n", + "0 0.0 0.0 0.0 False B \n", + "1 25.0 549.0 44.0 True F \n", + "2 0.0 6715.0 49.0 False A \n", + "3 371.0 3329.0 193.0 False A \n", + "4 151.0 565.0 2.0 True F " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "spaceship = spaceship.drop(columns=['Cabin'])\n", + "spaceship.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Age", + "rawType": "float64", + "type": "float" + }, + { + "name": "RoomService", + "rawType": "float64", + "type": "float" + }, + { + "name": "FoodCourt", + "rawType": "float64", + "type": "float" + }, + { + "name": "ShoppingMall", + "rawType": "float64", + "type": "float" + }, + { + "name": "Spa", + "rawType": "float64", + "type": "float" + }, + { + "name": "VRDeck", + "rawType": "float64", + "type": "float" + }, + { + "name": "Transported", + "rawType": "bool", + "type": "boolean" + }, + { + "name": "HomePlanet_Europa", + "rawType": "bool", + "type": "boolean" + }, + { + "name": "HomePlanet_Mars", + "rawType": "bool", + "type": "boolean" + }, + { + "name": "CryoSleep_True", + "rawType": "bool", + "type": "boolean" + }, + { + "name": "Destination_PSO J318.5-22", + "rawType": "bool", + "type": "boolean" + }, + { + "name": "Destination_TRAPPIST-1e", + "rawType": "bool", + "type": "boolean" + }, + { + "name": "VIP_True", + "rawType": "bool", + "type": "boolean" + }, + { + "name": "Cabin_letter_B", + "rawType": "bool", + "type": "boolean" + }, + { + "name": "Cabin_letter_C", + "rawType": "bool", + "type": "boolean" + }, + { + "name": "Cabin_letter_D", + "rawType": "bool", + "type": "boolean" + }, + { + "name": "Cabin_letter_E", + "rawType": "bool", + "type": "boolean" + }, + { + "name": "Cabin_letter_F", + "rawType": "bool", + "type": "boolean" + }, + { + "name": "Cabin_letter_G", + "rawType": "bool", + "type": "boolean" + }, + { + "name": "Cabin_letter_T", + "rawType": "bool", + "type": "boolean" + } + ], + "ref": "23ee4627-e934-415a-8c3a-2428faebe6f8", + "rows": [ + [ + "0", + "39.0", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "False", + "True", + "False", + "False", + "False", + "True", + "False", + "True", + "False", + "False", + "False", + "False", + "False", + "False" + ], + [ + "1", + "24.0", + "109.0", + "9.0", + "25.0", + "549.0", + "44.0", + "True", + "False", + "False", + "False", + "False", + "True", + "False", + "False", + "False", + "False", + "False", + "True", + "False", + "False" + ], + [ + "2", + "58.0", + "43.0", + "3576.0", + "0.0", + "6715.0", + "49.0", + "False", + "True", + "False", + "False", + "False", + "True", + "True", + "False", + "False", + "False", + "False", + "False", + "False", + "False" + ], + [ + "3", + "33.0", + "0.0", + "1283.0", + "371.0", + "3329.0", + "193.0", + "False", + "True", + "False", + "False", + "False", + "True", + "False", + "False", + "False", + "False", + "False", + "False", + "False", + "False" + ], + [ + "4", + "16.0", + "303.0", + "70.0", + "151.0", + "565.0", + "2.0", + "True", + "False", + "False", + "False", + "False", + "True", + "False", + "False", + "False", + "False", + "False", + "True", + "False", + "False" + ] + ], + "shape": { + "columns": 20, + "rows": 5 + } + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AgeRoomServiceFoodCourtShoppingMallSpaVRDeckTransportedHomePlanet_EuropaHomePlanet_MarsCryoSleep_TrueDestination_PSO J318.5-22Destination_TRAPPIST-1eVIP_TrueCabin_letter_BCabin_letter_CCabin_letter_DCabin_letter_ECabin_letter_FCabin_letter_GCabin_letter_T
039.00.00.00.00.00.0FalseTrueFalseFalseFalseTrueFalseTrueFalseFalseFalseFalseFalseFalse
124.0109.09.025.0549.044.0TrueFalseFalseFalseFalseTrueFalseFalseFalseFalseFalseTrueFalseFalse
258.043.03576.00.06715.049.0FalseTrueFalseFalseFalseTrueTrueFalseFalseFalseFalseFalseFalseFalse
333.00.01283.0371.03329.0193.0FalseTrueFalseFalseFalseTrueFalseFalseFalseFalseFalseFalseFalseFalse
416.0303.070.0151.0565.02.0TrueFalseFalseFalseFalseTrueFalseFalseFalseFalseFalseTrueFalseFalse
\n", + "
" + ], + "text/plain": [ + " Age RoomService FoodCourt ShoppingMall Spa VRDeck Transported \\\n", + "0 39.0 0.0 0.0 0.0 0.0 0.0 False \n", + "1 24.0 109.0 9.0 25.0 549.0 44.0 True \n", + "2 58.0 43.0 3576.0 0.0 6715.0 49.0 False \n", + "3 33.0 0.0 1283.0 371.0 3329.0 193.0 False \n", + "4 16.0 303.0 70.0 151.0 565.0 2.0 True \n", + "\n", + " HomePlanet_Europa HomePlanet_Mars CryoSleep_True \\\n", + "0 True False False \n", + "1 False False False \n", + "2 True False False \n", + "3 True False False \n", + "4 False False False \n", + "\n", + " Destination_PSO J318.5-22 Destination_TRAPPIST-1e VIP_True \\\n", + "0 False True False \n", + "1 False True False \n", + "2 False True True \n", + "3 False True False \n", + "4 False True False \n", + "\n", + " Cabin_letter_B Cabin_letter_C Cabin_letter_D Cabin_letter_E \\\n", + "0 True False False False \n", + "1 False False False False \n", + "2 False False False False \n", + "3 False False False False \n", + "4 False False False False \n", + "\n", + " Cabin_letter_F Cabin_letter_G Cabin_letter_T \n", + "0 False False False \n", + "1 True False False \n", + "2 False False False \n", + "3 False False False \n", + "4 True False False " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Crear variables dummy para todas las columnas no numéricas\n", + "spaceship = pd.get_dummies(spaceship, drop_first=True)\n", + "\n", + "# Revisar las primeras filas\n", + "spaceship.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "object", + "type": "string" + }, + { + "name": "0", + "rawType": "int64", + "type": "integer" + } + ], + "ref": "779bbc04-a1b3-43d0-9607-1a151b365de8", + "rows": [ + [ + "Age", + "179" + ], + [ + "RoomService", + "181" + ], + [ + "FoodCourt", + "183" + ], + [ + "ShoppingMall", + "208" + ], + [ + "Spa", + "183" + ], + [ + "VRDeck", + "188" + ], + [ + "Transported", + "0" + ], + [ + "HomePlanet_Europa", + "0" + ], + [ + "HomePlanet_Mars", + "0" + ], + [ + "CryoSleep_True", + "0" + ], + [ + "Destination_PSO J318.5-22", + "0" + ], + [ + "Destination_TRAPPIST-1e", + "0" + ], + [ + "VIP_True", + "0" + ], + [ + "Cabin_letter_B", + "0" + ], + [ + "Cabin_letter_C", + "0" + ], + [ + "Cabin_letter_D", + "0" + ], + [ + "Cabin_letter_E", + "0" + ], + [ + "Cabin_letter_F", + "0" + ], + [ + "Cabin_letter_G", + "0" + ], + [ + "Cabin_letter_T", + "0" + ] + ], + "shape": { + "columns": 1, + "rows": 20 + } + }, + "text/plain": [ + "Age 179\n", + "RoomService 181\n", + "FoodCourt 183\n", + "ShoppingMall 208\n", + "Spa 183\n", + "VRDeck 188\n", + "Transported 0\n", + "HomePlanet_Europa 0\n", + "HomePlanet_Mars 0\n", + "CryoSleep_True 0\n", + "Destination_PSO J318.5-22 0\n", + "Destination_TRAPPIST-1e 0\n", + "VIP_True 0\n", + "Cabin_letter_B 0\n", + "Cabin_letter_C 0\n", + "Cabin_letter_D 0\n", + "Cabin_letter_E 0\n", + "Cabin_letter_F 0\n", + "Cabin_letter_G 0\n", + "Cabin_letter_T 0\n", + "dtype: int64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "spaceship.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Nulos restantes: 0\n" + ] + } + ], + "source": [ + "# RELLENAR los nulos de las columnas numéricas que quedaron\n", + "from sklearn.impute import KNNImputer\n", + "from sklearn.preprocessing import StandardScaler\n", + "\n", + "# 1. Es VITAL escalar los datos antes de usar KNNImputer. \n", + "# Si no, las columnas con números grandes (como Spa) dominarán la distancia.\n", + "scaler = StandardScaler()\n", + "spaceship_scaled = scaler.fit_transform(spaceship)\n", + "\n", + "# 2. Configuramos el imputador (por defecto busca 5 vecinos)\n", + "imputer = KNNImputer(n_neighbors=5)\n", + "\n", + "# 3. Rellenamos los huecos\n", + "# Esto devuelve un array de numpy, así que lo convertimos de nuevo a DataFrame\n", + "spaceship_imputed = imputer.fit_transform(spaceship_scaled)\n", + "spaceship_final = pd.DataFrame(spaceship_imputed, columns=spaceship.columns)\n", + "print(f\"Nulos restantes: {spaceship_final.isnull().sum().sum()}\")" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -221,73 +2334,308 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Número de features originales: 19\n", + "Número de features seleccionadas: 10\n" + ] + } + ], "source": [ - "#your code here" + "#your code here\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.feature_selection import SelectFromModel\n", + "\n", + "# 1️⃣ Escalado de spaceship_final\n", + "scaler_final = StandardScaler()\n", + "X_scaled = scaler_final.fit_transform(spaceship_final.drop(columns=['Transported']))\n", + "\n", + "# 2️⃣ Separar variable objetivo\n", + "y = spaceship_final['Transported'].astype(int)\n", + "\n", + "# 3️⃣ Feature Selection usando RandomForest\n", + "selector = SelectFromModel(\n", + " RandomForestClassifier(n_estimators=100, random_state=42),\n", + " threshold=\"median\" # selecciona las features más importantes\n", + ")\n", + "\n", + "selector.fit(X_scaled, y)\n", + "\n", + "X_selected = selector.transform(X_scaled)\n", + "\n", + "print(f\"Número de features originales: {X_scaled.shape[1]}\")\n", + "print(f\"Número de features seleccionadas: {X_selected.shape[1]}\")" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 14, "metadata": {}, + "outputs": [], "source": [ - "- Now let's use the best model we got so far in order to see how it can improve when we fine tune it's hyperparameters." + "# Si tu variable Transported es True/False o -1/0, convertirla a 0/1\n", + "y = spaceship['Transported'].astype(int) # True->1, False->0" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "🎯 Tamaño de los conjuntos de entrenamiento y prueba:\n", + "--------------------------------------------------\n", + "X_train: (6954, 10)\n", + "X_test : (1739, 10)\n", + "y_train: (6954,)\n", + "y_test : (1739,)\n", + "--------------------------------------------------\n" + ] + } + ], "source": [ - "#your code here" + "# Perform Train Test Split\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "# Dividir en entrenamiento y prueba (80% - 20%)\n", + "X_train, X_test, y_train, y_test = train_test_split(\n", + " X_selected, # tus features seleccionadas\n", + " y, # variable objetivo\n", + " test_size=0.2, # 20% test\n", + " random_state=42 # para reproducibilidad\n", + ")\n", + "\n", + "# Revisar tamaños\n", + "print(\"🎯 Tamaño de los conjuntos de entrenamiento y prueba:\")\n", + "print(\"-\" * 50)\n", + "print(f\"X_train: {X_train.shape}\")\n", + "print(f\"X_test : {X_test.shape}\")\n", + "print(f\"y_train: {y_train.shape}\")\n", + "print(f\"y_test : {y_test.shape}\")\n", + "print(\"-\" * 50)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "- Evaluate your model" + "- Now let's use the best model we got so far in order to see how it can improve when we fine tune it's hyperparameters." ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "🏆 Gradient Boosting Classifier\n", + "--------------------------------------------------\n", + "Accuracy: 0.7913\n", + "R2 Score: 0.1650\n", + "MAE: 0.2087\n", + "\n", + "📊 Matriz de Confusión:\n", + "[[631 230]\n", + " [133 745]]\n", + "\n", + "📄 Reporte de Clasificación:\n", + " precision recall f1-score support\n", + "\n", + " 0 0.83 0.73 0.78 861\n", + " 1 0.76 0.85 0.80 878\n", + "\n", + " accuracy 0.79 1739\n", + " macro avg 0.80 0.79 0.79 1739\n", + "weighted avg 0.79 0.79 0.79 1739\n", + "\n", + "--------------------------------------------------\n" + ] + } + ], "source": [ - "#your code here" + "#your code here\n", + "from sklearn.ensemble import GradientBoostingClassifier\n", + "from sklearn.metrics import (\n", + " accuracy_score,\n", + " confusion_matrix,\n", + " classification_report,\n", + " r2_score,\n", + " mean_absolute_error\n", + ")\n", + "\n", + "# Inicializar el modelo\n", + "gb = GradientBoostingClassifier(\n", + " n_estimators=200,\n", + " learning_rate=0.1,\n", + " max_depth=3,\n", + " random_state=42\n", + ")\n", + "\n", + "# Entrenar\n", + "gb.fit(X_train, y_train)\n", + "\n", + "# Predecir\n", + "y_pred_gb = gb.predict(X_test)\n", + "\n", + "# --------------------------\n", + "# Evaluación\n", + "# --------------------------\n", + "accuracy = accuracy_score(y_test, y_pred_gb)\n", + "r2 = r2_score(y_test, y_pred_gb)\n", + "mae = mean_absolute_error(y_test, y_pred_gb)\n", + "\n", + "print(\"🏆 Gradient Boosting Classifier\")\n", + "print(\"-\" * 50)\n", + "print(f\"Accuracy: {accuracy:.4f}\")\n", + "print(f\"R2 Score: {r2:.4f}\")\n", + "print(f\"MAE: {mae:.4f}\")\n", + "\n", + "print(\"\\n📊 Matriz de Confusión:\")\n", + "print(confusion_matrix(y_test, y_pred_gb))\n", + "\n", + "print(\"\\n📄 Reporte de Clasificación:\")\n", + "print(classification_report(y_test, y_pred_gb))\n", + "print(\"-\" * 50)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "**Grid/Random Search**" + "- Evaluate your model" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "🏆 Gradient Boosting Classifier\n", + "--------------------------------------------------\n", + "Accuracy: 0.7913\n", + "R2 Score: 0.1650\n", + "MAE: 0.2087\n", + "\n", + "📊 Matriz de Confusión:\n", + "[[631 230]\n", + " [133 745]]\n", + "\n", + "📄 Reporte de Clasificación:\n", + " precision recall f1-score support\n", + "\n", + " 0 0.83 0.73 0.78 861\n", + " 1 0.76 0.85 0.80 878\n", + "\n", + " accuracy 0.79 1739\n", + " macro avg 0.80 0.79 0.79 1739\n", + "weighted avg 0.79 0.79 0.79 1739\n", + "\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "#your code here\n", + "# --------------------------\n", + "# Evaluación\n", + "# --------------------------\n", + "accuracy = accuracy_score(y_test, y_pred_gb)\n", + "r2 = r2_score(y_test, y_pred_gb)\n", + "mae = mean_absolute_error(y_test, y_pred_gb)\n", + "\n", + "print(\"🏆 Gradient Boosting Classifier\")\n", + "print(\"-\" * 50)\n", + "print(f\"Accuracy: {accuracy:.4f}\")\n", + "print(f\"R2 Score: {r2:.4f}\")\n", + "print(f\"MAE: {mae:.4f}\")\n", + "\n", + "print(\"\\n📊 Matriz de Confusión:\")\n", + "print(confusion_matrix(y_test, y_pred_gb))\n", + "\n", + "print(\"\\n📄 Reporte de Clasificación:\")\n", + "print(classification_report(y_test, y_pred_gb))\n", + "print(\"-\" * 50)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "For this lab we will use Grid Search." + "**Grid/Random Search**" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "- Define hyperparameters to fine tune." + "For this lab we will use Grid Search." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mejor Accuracy: 0.7918\n", + "Mejor R2: 0.1673\n", + "Mejor MAE: 0.2082\n" + ] + } + ], "source": [ - "#your code here" + "#your code here\n", + "from sklearn.model_selection import RandomizedSearchCV\n", + "from sklearn.ensemble import GradientBoostingClassifier\n", + "\n", + "# 1. Definimos un rango de parámetros (más amplio que antes)\n", + "param_dist = {\n", + " 'n_estimators': [100, 300, 500], # Número de árboles\n", + " 'learning_rate': [0.01, 0.05, 0.1], # Paso de aprendizaje\n", + " 'max_depth': [3, 4, 5, 6], # Profundidad de los árboles\n", + " 'min_samples_split': [2, 5, 10], # Mínimo de datos para dividir un nodo\n", + " 'subsample': [0.8, 0.9, 1.0] # Usar solo una parte de los datos para cada árbol\n", + "}\n", + "# 2. Configuramos la búsqueda aleatoria\n", + "random_search = RandomizedSearchCV(\n", + " estimator=GradientBoostingClassifier(random_state=42),\n", + " param_distributions=param_dist,\n", + " n_iter=10, # ¡ESTO ES LA CLAVE! Solo probará 10 combinaciones al azar\n", + " cv=5, \n", + " n_jobs=-1, \n", + " scoring='accuracy',\n", + " random_state=42\n", + ")\n", + "# 3. Entrenar\n", + "random_search.fit(X_train, y_train)\n", + "\n", + "# 4. El mejor modelo ya está \"listo\" en random_search.best_estimator_\n", + "best_gb_model = random_search.best_estimator_\n", + "y_pred = best_gb_model.predict(X_test)\n", + "print(f\"Mejor Accuracy: {accuracy_score(y_test, y_pred):.4f}\")\n", + "\n", + "#dame el r2 y mae\n", + "print(f\"Mejor R2: {r2_score(y_test, y_pred):.4f}\")\n", + "print(f\"Mejor MAE: {mean_absolute_error(y_test, y_pred):.4f}\")" ] }, { @@ -299,10 +2647,80 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "🚀 Iniciando refinamiento con Grid Search...\n", + "\n", + "==============================\n", + "🏆 MODELO OPTIMIZADO FINAL\n", + "==============================\n", + "Mejores parámetros: {'learning_rate': 0.01, 'max_depth': 4, 'n_estimators': 400, 'subsample': 0.9}\n", + "Accuracy: 0.7878\n", + "R2 Score: 0.1512\n", + "MAE: 0.2122\n", + "\n", + "📊 Matriz de Confusión:\n", + "[[618 243]\n", + " [126 752]]\n", + "\n", + "📄 Reporte de Clasificación:\n", + " precision recall f1-score support\n", + "\n", + " 0 0.83 0.72 0.77 861\n", + " 1 0.76 0.86 0.80 878\n", + "\n", + " accuracy 0.79 1739\n", + " macro avg 0.79 0.79 0.79 1739\n", + "weighted avg 0.79 0.79 0.79 1739\n", + "\n" + ] + } + ], + "source": [ + "from sklearn.model_selection import GridSearchCV\n", + "from sklearn.metrics import accuracy_score, r2_score, mean_absolute_error, confusion_matrix, classification_report\n", + "\n", + "# 1. Definimos una cuadrícula más fina basada en tus resultados previos\n", + "# Ya sabemos que valores cercanos a estos funcionan bien\n", + "param_grid = {\n", + " 'n_estimators': [300, 400, 500],\n", + " 'learning_rate': [0.01, 0.05],\n", + " 'max_depth': [4, 5],\n", + " 'subsample': [0.8, 0.9]\n", + "}\n", + "# 2. Ejecutamos Grid Search (esta vez busca todas las combinaciones de esta lista pequeña)\n", + "grid_search = GridSearchCV(\n", + " estimator=GradientBoostingClassifier(random_state=42),\n", + " param_grid=param_grid,\n", + " cv=5,\n", + " n_jobs=-1,\n", + " scoring='accuracy'\n", + ")\n", + "print(\"🚀 Iniciando refinamiento con Grid Search...\")\n", + "grid_search.fit(X_train, y_train)\n", + "\n", + "# 3. Extraemos el mejor modelo y evaluamos\n", + "best_model = grid_search.best_estimator_\n", + "y_pred = best_model.predict(X_test)\n", + "\n", + "# 4. Resultados finales limpios\n", + "print(\"\\n\" + \"=\"*30)\n", + "print(\"🏆 MODELO OPTIMIZADO FINAL\")\n", + "print(\"=\"*30)\n", + "print(f\"Mejores parámetros: {grid_search.best_params_}\")\n", + "print(f\"Accuracy: {accuracy_score(y_test, y_pred):.4f}\")\n", + "print(f\"R2 Score: {r2_score(y_test, y_pred):.4f}\")\n", + "print(f\"MAE: {mean_absolute_error(y_test, y_pred):.4f}\")\n", + "print(\"\\n📊 Matriz de Confusión:\")\n", + "print(confusion_matrix(y_test, y_pred))\n", + "print(\"\\n📄 Reporte de Clasificación:\")\n", + "print(classification_report(y_test, y_pred))" + ] }, { "cell_type": "markdown", @@ -313,10 +2731,44 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Método Accuracy R2 MAE\n", + "0 Random Search 0.7918 0.1673 0.2082\n", + "1 Grid Search 0.7878 0.1512 0.2122\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "# Resultados de Random Search\n", + "random_results = {\n", + " 'Método': 'Random Search',\n", + " 'Accuracy': 0.7918,\n", + " 'R2': 0.1673,\n", + " 'MAE': 0.2082\n", + "}\n", + "\n", + "# Resultados de Grid Search\n", + "grid_results = {\n", + " 'Método': 'Grid Search',\n", + " 'Accuracy': 0.7878,\n", + " 'R2': 0.1512,\n", + " 'MAE': 0.2122\n", + "}\n", + "\n", + "# Crear DataFrame\n", + "df_comparativa = pd.DataFrame([random_results, grid_results])\n", + "\n", + "# Mostrar tabla\n", + "print(df_comparativa)\n" + ] } ], "metadata": { @@ -335,7 +2787,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.12.9" } }, "nbformat": 4,