diff --git a/lab-hyper-tuning.ipynb b/lab-hyper-tuning.ipynb
index 847d487..262e202 100644
--- a/lab-hyper-tuning.ipynb
+++ b/lab-hyper-tuning.ipynb
@@ -52,6 +52,177 @@
"outputs": [
{
"data": {
+ "application/vnd.microsoft.datawrangler.viewer.v0+json": {
+ "columns": [
+ {
+ "name": "index",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "PassengerId",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "HomePlanet",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "CryoSleep",
+ "rawType": "object",
+ "type": "unknown"
+ },
+ {
+ "name": "Cabin",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "Destination",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "Age",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "VIP",
+ "rawType": "object",
+ "type": "unknown"
+ },
+ {
+ "name": "RoomService",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "FoodCourt",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "ShoppingMall",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "Spa",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "VRDeck",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "Name",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "Transported",
+ "rawType": "bool",
+ "type": "boolean"
+ }
+ ],
+ "ref": "b895d310-201e-45f5-a169-0f6e69208be9",
+ "rows": [
+ [
+ "0",
+ "0001_01",
+ "Europa",
+ "False",
+ "B/0/P",
+ "TRAPPIST-1e",
+ "39.0",
+ "False",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "0.0",
+ "Maham Ofracculy",
+ "False"
+ ],
+ [
+ "1",
+ "0002_01",
+ "Earth",
+ "False",
+ "F/0/S",
+ "TRAPPIST-1e",
+ "24.0",
+ "False",
+ "109.0",
+ "9.0",
+ "25.0",
+ "549.0",
+ "44.0",
+ "Juanna Vines",
+ "True"
+ ],
+ [
+ "2",
+ "0003_01",
+ "Europa",
+ "False",
+ "A/0/S",
+ "TRAPPIST-1e",
+ "58.0",
+ "True",
+ "43.0",
+ "3576.0",
+ "0.0",
+ "6715.0",
+ "49.0",
+ "Altark Susent",
+ "False"
+ ],
+ [
+ "3",
+ "0003_02",
+ "Europa",
+ "False",
+ "A/0/S",
+ "TRAPPIST-1e",
+ "33.0",
+ "False",
+ "0.0",
+ "1283.0",
+ "371.0",
+ "3329.0",
+ "193.0",
+ "Solam Susent",
+ "False"
+ ],
+ [
+ "4",
+ "0004_01",
+ "Earth",
+ "False",
+ "F/1/S",
+ "TRAPPIST-1e",
+ "16.0",
+ "False",
+ "303.0",
+ "70.0",
+ "151.0",
+ "565.0",
+ "2.0",
+ "Willy Santantines",
+ "True"
+ ]
+ ],
+ "shape": {
+ "columns": 14,
+ "rows": 5
+ }
+ },
"text/html": [
"
\n",
"\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " HomePlanet | \n",
+ " CryoSleep | \n",
+ " Destination | \n",
+ " Age | \n",
+ " VIP | \n",
+ " RoomService | \n",
+ " FoodCourt | \n",
+ " ShoppingMall | \n",
+ " Spa | \n",
+ " VRDeck | \n",
+ " Transported | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 39.0 | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 24.0 | \n",
+ " 0 | \n",
+ " 109.0 | \n",
+ " 9.0 | \n",
+ " 25.0 | \n",
+ " 549.0 | \n",
+ " 44.0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 58.0 | \n",
+ " 1 | \n",
+ " 43.0 | \n",
+ " 3576.0 | \n",
+ " 0.0 | \n",
+ " 6715.0 | \n",
+ " 49.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 33.0 | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " 1283.0 | \n",
+ " 371.0 | \n",
+ " 3329.0 | \n",
+ " 193.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 16.0 | \n",
+ " 0 | \n",
+ " 303.0 | \n",
+ " 70.0 | \n",
+ " 151.0 | \n",
+ " 565.0 | \n",
+ " 2.0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " HomePlanet CryoSleep Destination Age VIP RoomService FoodCourt \\\n",
+ "0 1 0 2 39.0 0 0.0 0.0 \n",
+ "1 0 0 2 24.0 0 109.0 9.0 \n",
+ "2 1 0 2 58.0 1 43.0 3576.0 \n",
+ "3 1 0 2 33.0 0 0.0 1283.0 \n",
+ "4 0 0 2 16.0 0 303.0 70.0 \n",
+ "\n",
+ " ShoppingMall Spa VRDeck Transported \n",
+ "0 0.0 0.0 0.0 0 \n",
+ "1 25.0 549.0 44.0 1 \n",
+ "2 0.0 6715.0 49.0 0 \n",
+ "3 371.0 3329.0 193.0 0 \n",
+ "4 151.0 565.0 2.0 1 "
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code here"
+ "spaceship.head()"
]
},
{
@@ -253,11 +730,51 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 27,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Model: Random Forest | Success: 0.7786\n",
+ "Model: Gradient Boosting | Success: 0.7855\n",
+ "Model: AdaBoost | Success: 0.7614\n",
+ "Success (Accuracy) initial of the model is: 0.7855\n"
+ ]
+ }
+ ],
"source": [
- "#your code here"
+ "from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier\n",
+ "\n",
+ "# 1. Definimos los candidatos\n",
+ "models = {\n",
+ " \"Random Forest\": RandomForestClassifier(random_state=42),\n",
+ " \"Gradient Boosting\": GradientBoostingClassifier(random_state=42),\n",
+ " \"AdaBoost\": AdaBoostClassifier(random_state=42)\n",
+ "}\n",
+ "\n",
+ "# 2. Entrenamos y evaluamos a cada uno\n",
+ "for name, model in models.items():\n",
+ " model.fit(X_train, y_train)\n",
+ " score = model.score(X_test, y_test)\n",
+ " print(f\"Model: {name} | Success: {score:.4f}\")\n",
+ "\n",
+ "from sklearn.metrics import accuracy_score\n",
+ "\n",
+ "# 1. Definimos el modelo ganador con sus ajustes de fábrica\n",
+ "gb_model = GradientBoostingClassifier(random_state=42)\n",
+ "\n",
+ "# 2. El modelo estudia los datos de entrenamiento\n",
+ "gb_model.fit(X_train, y_train)\n",
+ "\n",
+ "# 3. El modelo hace el examen (predice sobre X_test)\n",
+ "y_pred = gb_model.predict(X_test)\n",
+ "\n",
+ "# 4. Comparamos sus respuestas con las reales para sacar la nota\n",
+ "baseline_accuracy = accuracy_score(y_test, y_pred)\n",
+ "\n",
+ "print(f\"Success (Accuracy) initial of the model is: {baseline_accuracy:.4f}\")"
]
},
{
@@ -283,11 +800,17 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
- "#your code here"
+ "# Hyperparmeters menu for the GridSearchCV\n",
+ "param_grid = {\n",
+ " 'n_estimators': [100, 150, 200],\n",
+ " 'learning_rate': [0.05, 0.1, 0.15],\n",
+ " 'max_depth': [3, 4, 5], \n",
+ " 'subsample': [0.8, 1.0] \n",
+ "}"
]
},
{
@@ -299,10 +822,35 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 24,
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Fitting 5 folds for each of 54 candidates, totalling 270 fits\n",
+ "Final search complete!\n",
+ "The best parameters are: {'learning_rate': 0.05, 'max_depth': 5, 'n_estimators': 150, 'subsample': 0.8}\n"
+ ]
+ }
+ ],
+ "source": [
+ "from sklearn.model_selection import GridSearchCV\n",
+ "\n",
+ "# 1. Creamos el buscador automático\n",
+ "# cv=5 hace que el modelo sea más robusto (validación cruzada)\n",
+ "grid_search = GridSearchCV(estimator=gb_model, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)\n",
+ "\n",
+ "# 2. Empezamos la búsqueda (esto puede tardar un poco)\n",
+ "grid_search.fit(X_train, y_train)\n",
+ "\n",
+ "# 3. Guardamos el mejor modelo encontrado\n",
+ "best_model = grid_search.best_estimator_\n",
+ "\n",
+ "print(\"Final search complete!\")\n",
+ "print(\"The best parameters are:\", grid_search.best_params_)"
+ ]
},
{
"cell_type": "markdown",
@@ -313,15 +861,36 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 23,
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Initial success (Baseline): 0.7855\n",
+ "Final success (Tuned): 0.7890\n",
+ "Total improvement: 0.35%\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 1. Usamos el mejor modelo encontrado para predecir\n",
+ "final_predictions = best_model.predict(X_test)\n",
+ "\n",
+ "# 2. Calculamos la nueva nota\n",
+ "final_accuracy = accuracy_score(y_test, final_predictions)\n",
+ "\n",
+ "# 3. Comparamos los resultados\n",
+ "print(f\"Initial success (Baseline): {initial_accuracy:.4f}\")\n",
+ "print(f\"Final success (Tuned): {final_accuracy:.4f}\")\n",
+ "print(f\"Total improvement: {(final_accuracy - initial_accuracy)*100:.2f}%\")"
+ ]
}
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "base",
"language": "python",
"name": "python3"
},
@@ -335,7 +904,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.10.9"
+ "version": "3.12.10"
}
},
"nbformat": 4,