diff --git a/lab-hyper-tuning.ipynb b/lab-hyper-tuning.ipynb index 847d487..262e202 100644 --- a/lab-hyper-tuning.ipynb +++ b/lab-hyper-tuning.ipynb @@ -52,6 +52,177 @@ "outputs": [ { "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "PassengerId", + "rawType": "object", + "type": "string" + }, + { + "name": "HomePlanet", + "rawType": "object", + "type": "string" + }, + { + "name": "CryoSleep", + "rawType": "object", + "type": "unknown" + }, + { + "name": "Cabin", + "rawType": "object", + "type": "string" + }, + { + "name": "Destination", + "rawType": "object", + "type": "string" + }, + { + "name": "Age", + "rawType": "float64", + "type": "float" + }, + { + "name": "VIP", + "rawType": "object", + "type": "unknown" + }, + { + "name": "RoomService", + "rawType": "float64", + "type": "float" + }, + { + "name": "FoodCourt", + "rawType": "float64", + "type": "float" + }, + { + "name": "ShoppingMall", + "rawType": "float64", + "type": "float" + }, + { + "name": "Spa", + "rawType": "float64", + "type": "float" + }, + { + "name": "VRDeck", + "rawType": "float64", + "type": "float" + }, + { + "name": "Name", + "rawType": "object", + "type": "string" + }, + { + "name": "Transported", + "rawType": "bool", + "type": "boolean" + } + ], + "ref": "b895d310-201e-45f5-a169-0f6e69208be9", + "rows": [ + [ + "0", + "0001_01", + "Europa", + "False", + "B/0/P", + "TRAPPIST-1e", + "39.0", + "False", + "0.0", + "0.0", + "0.0", + "0.0", + "0.0", + "Maham Ofracculy", + "False" + ], + [ + "1", + "0002_01", + "Earth", + "False", + "F/0/S", + "TRAPPIST-1e", + "24.0", + "False", + "109.0", + "9.0", + "25.0", + "549.0", + "44.0", + "Juanna Vines", + "True" + ], + [ + "2", + "0003_01", + "Europa", + "False", + "A/0/S", + "TRAPPIST-1e", + "58.0", + "True", + "43.0", + "3576.0", + "0.0", + "6715.0", + "49.0", + "Altark Susent", + "False" + ], + [ + "3", + "0003_02", + "Europa", + "False", + "A/0/S", + "TRAPPIST-1e", + "33.0", + "False", + "0.0", + "1283.0", + "371.0", + "3329.0", + "193.0", + "Solam Susent", + "False" + ], + [ + "4", + "0004_01", + "Earth", + "False", + "F/1/S", + "TRAPPIST-1e", + "16.0", + "False", + "303.0", + "70.0", + "151.0", + "565.0", + "2.0", + "Willy Santantines", + "True" + ] + ], + "shape": { + "columns": 14, + "rows": 5 + } + }, "text/html": [ "
\n", "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HomePlanetCryoSleepDestinationAgeVIPRoomServiceFoodCourtShoppingMallSpaVRDeckTransported
010239.000.00.00.00.00.00
100224.00109.09.025.0549.044.01
210258.0143.03576.00.06715.049.00
310233.000.01283.0371.03329.0193.00
400216.00303.070.0151.0565.02.01
\n", + "
" + ], + "text/plain": [ + " HomePlanet CryoSleep Destination Age VIP RoomService FoodCourt \\\n", + "0 1 0 2 39.0 0 0.0 0.0 \n", + "1 0 0 2 24.0 0 109.0 9.0 \n", + "2 1 0 2 58.0 1 43.0 3576.0 \n", + "3 1 0 2 33.0 0 0.0 1283.0 \n", + "4 0 0 2 16.0 0 303.0 70.0 \n", + "\n", + " ShoppingMall Spa VRDeck Transported \n", + "0 0.0 0.0 0.0 0 \n", + "1 25.0 549.0 44.0 1 \n", + "2 0.0 6715.0 49.0 0 \n", + "3 371.0 3329.0 193.0 0 \n", + "4 151.0 565.0 2.0 1 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code here" + "spaceship.head()" ] }, { @@ -253,11 +730,51 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 27, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: Random Forest | Success: 0.7786\n", + "Model: Gradient Boosting | Success: 0.7855\n", + "Model: AdaBoost | Success: 0.7614\n", + "Success (Accuracy) initial of the model is: 0.7855\n" + ] + } + ], "source": [ - "#your code here" + "from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier\n", + "\n", + "# 1. Definimos los candidatos\n", + "models = {\n", + " \"Random Forest\": RandomForestClassifier(random_state=42),\n", + " \"Gradient Boosting\": GradientBoostingClassifier(random_state=42),\n", + " \"AdaBoost\": AdaBoostClassifier(random_state=42)\n", + "}\n", + "\n", + "# 2. Entrenamos y evaluamos a cada uno\n", + "for name, model in models.items():\n", + " model.fit(X_train, y_train)\n", + " score = model.score(X_test, y_test)\n", + " print(f\"Model: {name} | Success: {score:.4f}\")\n", + "\n", + "from sklearn.metrics import accuracy_score\n", + "\n", + "# 1. Definimos el modelo ganador con sus ajustes de fábrica\n", + "gb_model = GradientBoostingClassifier(random_state=42)\n", + "\n", + "# 2. El modelo estudia los datos de entrenamiento\n", + "gb_model.fit(X_train, y_train)\n", + "\n", + "# 3. El modelo hace el examen (predice sobre X_test)\n", + "y_pred = gb_model.predict(X_test)\n", + "\n", + "# 4. Comparamos sus respuestas con las reales para sacar la nota\n", + "baseline_accuracy = accuracy_score(y_test, y_pred)\n", + "\n", + "print(f\"Success (Accuracy) initial of the model is: {baseline_accuracy:.4f}\")" ] }, { @@ -283,11 +800,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "# Hyperparmeters menu for the GridSearchCV\n", + "param_grid = {\n", + " 'n_estimators': [100, 150, 200],\n", + " 'learning_rate': [0.05, 0.1, 0.15],\n", + " 'max_depth': [3, 4, 5], \n", + " 'subsample': [0.8, 1.0] \n", + "}" ] }, { @@ -299,10 +822,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitting 5 folds for each of 54 candidates, totalling 270 fits\n", + "Final search complete!\n", + "The best parameters are: {'learning_rate': 0.05, 'max_depth': 5, 'n_estimators': 150, 'subsample': 0.8}\n" + ] + } + ], + "source": [ + "from sklearn.model_selection import GridSearchCV\n", + "\n", + "# 1. Creamos el buscador automático\n", + "# cv=5 hace que el modelo sea más robusto (validación cruzada)\n", + "grid_search = GridSearchCV(estimator=gb_model, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)\n", + "\n", + "# 2. Empezamos la búsqueda (esto puede tardar un poco)\n", + "grid_search.fit(X_train, y_train)\n", + "\n", + "# 3. Guardamos el mejor modelo encontrado\n", + "best_model = grid_search.best_estimator_\n", + "\n", + "print(\"Final search complete!\")\n", + "print(\"The best parameters are:\", grid_search.best_params_)" + ] }, { "cell_type": "markdown", @@ -313,15 +861,36 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Initial success (Baseline): 0.7855\n", + "Final success (Tuned): 0.7890\n", + "Total improvement: 0.35%\n" + ] + } + ], + "source": [ + "# 1. Usamos el mejor modelo encontrado para predecir\n", + "final_predictions = best_model.predict(X_test)\n", + "\n", + "# 2. Calculamos la nueva nota\n", + "final_accuracy = accuracy_score(y_test, final_predictions)\n", + "\n", + "# 3. Comparamos los resultados\n", + "print(f\"Initial success (Baseline): {initial_accuracy:.4f}\")\n", + "print(f\"Final success (Tuned): {final_accuracy:.4f}\")\n", + "print(f\"Total improvement: {(final_accuracy - initial_accuracy)*100:.2f}%\")" + ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "base", "language": "python", "name": "python3" }, @@ -335,7 +904,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.12.10" } }, "nbformat": 4,