diff --git a/lab-hyper-tuning.ipynb b/lab-hyper-tuning.ipynb index 847d487..43b3739 100644 --- a/lab-hyper-tuning.ipynb +++ b/lab-hyper-tuning.ipynb @@ -35,7 +35,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -47,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -200,7 +200,7 @@ "4 True " ] }, - "execution_count": 2, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -225,7 +225,73 @@ "metadata": {}, "outputs": [], "source": [ - "#your code here" + "spaceship_clean = spaceship.dropna()\n", + "col_a_borrar = ['PassengerId', 'Name','Cabin']\n", + "spaceship_clean = spaceship_clean.drop(columns=col_a_borrar)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "spaceship_dumm = pd.get_dummies(spaceship_clean, drop_first=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "y = spaceship_dumm['Transported']\n", + "X = spaceship_dumm.drop(columns=['Transported'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tamaño X_train: (5284, 12)\n", + "Tamaño X_test: (1322, 12)\n" + ] + } + ], + "source": [ + "print(\"Tamaño X_train:\", X_train.shape)\n", + "print(\"Tamaño X_test:\", X_test.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import StandardScaler\n", + "\n", + "num_cols = ['Age', 'RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck']\n", + "\n", + "scaler = StandardScaler()\n", + "\n", + "X_train[num_cols] = scaler.fit_transform(X_train[num_cols])\n", + "\n", + "X_test[num_cols] = scaler.transform(X_test[num_cols])" ] }, { @@ -237,11 +303,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.metrics import accuracy_score" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "rf_base = RandomForestClassifier(random_state=42)\n", + "rf_base.fit(X_train, y_train)\n", + "\n", + "y_pred_base = rf_base.predict(X_test)\n", + "\n" ] }, { @@ -253,11 +333,20 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 21, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy del Random Forest Base: 0.7958\n" + ] + } + ], "source": [ - "#your code here" + "accuracy_base = accuracy_score(y_test, y_pred_base)\n", + "print(f\"Accuracy del Random Forest Base: {accuracy_base:.4f}\")" ] }, { @@ -283,11 +372,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "from sklearn.model_selection import GridSearchCV" ] }, { @@ -302,7 +391,14 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "param_grid = {\n", + " 'n_estimators': [50, 100, 200], \n", + " 'max_depth': [5, 10, 20]\n", + "}\n", + "grid_search = GridSearchCV(estimator=rf_base, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)\n", + "\n" + ] }, { "cell_type": "markdown", @@ -313,15 +409,51 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitting 5 folds for each of 9 candidates, totalling 45 fits\n", + "Los mejores hiperparámetros encontrados fueron: {'max_depth': 10, 'n_estimators': 200}\n" + ] + } + ], + "source": [ + "grid_search.fit(X_train, y_train)\n", + "\n", + "\n", + "mejor_modelo = grid_search.best_estimator_\n", + "\n", + "print(f\"Los mejores hiperparámetros encontrados fueron: {grid_search.best_params_}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy del Random Forest con Grid: 0.8048\n" + ] + } + ], + "source": [ + "y_pred_tuneado = mejor_modelo.predict(X_test)\n", + "\n", + "accuracy_tuneado = accuracy_score(y_test, y_pred_tuneado)\n", + "print(f\"Accuracy del Random Forest con Grid: {accuracy_tuneado:.4f}\")" + ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "base", "language": "python", "name": "python3" }, @@ -335,7 +467,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.13.5" } }, "nbformat": 4,