data-bootcamp-v4 · manufranso · Feb 26, 2026
diff --git a/lab-hyper-tuning.ipynb b/lab-hyper-tuning.ipynb
@@ -35,7 +35,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -47,7 +47,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -200,7 +200,7 @@
        "4         True  "
       ]
      },
-     "execution_count": 2,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -225,7 +225,73 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#your code here"
+    "spaceship_clean = spaceship.dropna()\n",
+    "col_a_borrar = ['PassengerId', 'Name','Cabin']\n",
+    "spaceship_clean = spaceship_clean.drop(columns=col_a_borrar)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "spaceship_dumm = pd.get_dummies(spaceship_clean, drop_first=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y = spaceship_dumm['Transported']\n",
+    "X = spaceship_dumm.drop(columns=['Transported'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Tamaño X_train: (5284, 12)\n",
+      "Tamaño X_test: (1322, 12)\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"Tamaño X_train:\", X_train.shape)\n",
+    "print(\"Tamaño X_test:\", X_test.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.preprocessing import StandardScaler\n",
+    "\n",
+    "num_cols = ['Age', 'RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck']\n",
+    "\n",
+    "scaler = StandardScaler()\n",
+    "\n",
+    "X_train[num_cols] = scaler.fit_transform(X_train[num_cols])\n",
+    "\n",
+    "X_test[num_cols] = scaler.transform(X_test[num_cols])"
    ]
   },
   {
@@ -237,11 +303,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
-    "#your code here"
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "from sklearn.metrics import accuracy_score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rf_base = RandomForestClassifier(random_state=42)\n",
+    "rf_base.fit(X_train, y_train)\n",
+    "\n",
+    "y_pred_base = rf_base.predict(X_test)\n",
+    "\n"
    ]
   },
   {
@@ -253,11 +333,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 21,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Accuracy del Random Forest Base: 0.7958\n"
+     ]
+    }
+   ],
    "source": [
-    "#your code here"
+    "accuracy_base = accuracy_score(y_test, y_pred_base)\n",
+    "print(f\"Accuracy del Random Forest Base: {accuracy_base:.4f}\")"
    ]
   },
   {
@@ -283,11 +372,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [],
    "source": [
-    "#your code here"
+    "from sklearn.model_selection import GridSearchCV"
    ]
   },
   {
@@ -302,7 +391,14 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "param_grid = {\n",
+    "    'n_estimators': [50, 100, 200], \n",
+    "        'max_depth': [5, 10, 20]\n",
+    "}\n",
+    "grid_search = GridSearchCV(estimator=rf_base, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)\n",
+    "\n"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -313,15 +409,51 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 31,
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fitting 5 folds for each of 9 candidates, totalling 45 fits\n",
+      "Los mejores hiperparámetros encontrados fueron: {'max_depth': 10, 'n_estimators': 200}\n"
+     ]
+    }
+   ],
+   "source": [
+    "grid_search.fit(X_train, y_train)\n",
+    "\n",
+    "\n",
+    "mejor_modelo = grid_search.best_estimator_\n",
+    "\n",
+    "print(f\"Los mejores hiperparámetros encontrados fueron: {grid_search.best_params_}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Accuracy del Random Forest con Grid: 0.8048\n"
+     ]
+    }
+   ],
+   "source": [
+    "y_pred_tuneado = mejor_modelo.predict(X_test)\n",
+    "\n",
+    "accuracy_tuneado = accuracy_score(y_test, y_pred_tuneado)\n",
+    "print(f\"Accuracy del Random Forest con Grid: {accuracy_tuneado:.4f}\")"
+   ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "base",
    "language": "python",
    "name": "python3"
   },
@@ -335,7 +467,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.9"
+   "version": "3.13.5"
   }
  },
  "nbformat": 4,