From 50c825106053d556ce11fa0b646ab0cc5952bc7d Mon Sep 17 00:00:00 2001 From: Lautaro Date: Thu, 26 Feb 2026 11:48:32 +0100 Subject: [PATCH] Lab-HT-done --- lab-hyper-tuning.ipynb | 1009 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 991 insertions(+), 18 deletions(-) diff --git a/lab-hyper-tuning.ipynb b/lab-hyper-tuning.ipynb index 847d487..465c7b1 100644 --- a/lab-hyper-tuning.ipynb +++ b/lab-hyper-tuning.ipynb @@ -35,7 +35,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -47,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -200,14 +200,55 @@ "4 True " ] }, - "execution_count": 2, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "spaceship = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/spaceship_titanic.csv\")\n", - "spaceship.head()" + "spaceship.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(8693, 14)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "spaceship.shape " + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "#Importamos todas las librerías de Skylearn que vamos a usar en el proyecto\n", + " #Skylearn --\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.ensemble import BaggingClassifier\n", + "from sklearn.neighbors import KNeighborsClassifier\n", + "from sklearn.metrics import accuracy_score\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.ensemble import GradientBoostingClassifier\n", + "from sklearn.ensemble import AdaBoostClassifier\n", + "\n", + "#Eliminacion de nulos y valores no numéricos\n", + "spaceship = spaceship.dropna()" ] }, { @@ -221,11 +262,18 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "#Preprocesamiento de datos y escalado de características\n", + "spaceship = pd.get_dummies(spaceship, drop_first=True)\n", + "\n", + "X = spaceship.drop(columns=['Transported'])\n", + "y = spaceship['Transported']\n", + "\n", + "scaler = StandardScaler()\n", + "X_scaled = scaler.fit_transform(X)" ] }, { @@ -237,11 +285,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "#Bagging fue el mejor modelo del proyecto anterior, con lo cual lo vamos a usar como base para el proyecto de hiperparámetros\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)\n", + "\n", + "bagging = BaggingClassifier(\n", + " estimator=DecisionTreeClassifier(),\n", + " n_estimators=10,\n", + " random_state=42,\n", + " n_jobs=1\n", + ")\n", + "bagging.fit(X_train, y_train)\n", + "acc_bag = accuracy_score(y_test, bagging.predict(X_test))" ] }, { @@ -253,11 +313,20 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 17, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy of Bagging Classifier with default hyperparameters: 0.8011\n" + ] + } + ], "source": [ - "#your code here" + "#Evaluamos el modelo con los hiperparámetros por defecto\n", + "print(f\"Accuracy of Bagging Classifier with default hyperparameters: {acc_bag:.4f}\") " ] }, { @@ -283,11 +352,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "#Definición de los hiperparámetros a ajustar\n", + "param_grid = {\n", + " 'n_estimators': [10],\n", + " 'max_samples': [1.0],\n", + " 'max_features': [1.0]\n", + "}" ] }, { @@ -299,10 +373,894 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "#Ejecutamos la búsqueda de hiperparámetros\n", + "from sklearn.model_selection import GridSearchCV\n", + "\n", + "grid_search = GridSearchCV(\n", + " estimator=bagging,\n", + " param_grid=param_grid,\n", + " scoring='accuracy',\n", + " cv=3,\n", + " n_jobs=-1\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
GridSearchCV(cv=3,\n",
+       "             estimator=BaggingClassifier(estimator=DecisionTreeClassifier(),\n",
+       "                                         n_jobs=1, random_state=42),\n",
+       "             n_jobs=-1,\n",
+       "             param_grid={'max_features': [1.0], 'max_samples': [1.0],\n",
+       "                         'n_estimators': [10]},\n",
+       "             scoring='accuracy')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "GridSearchCV(cv=3,\n", + " estimator=BaggingClassifier(estimator=DecisionTreeClassifier(),\n", + " n_jobs=1, random_state=42),\n", + " n_jobs=-1,\n", + " param_grid={'max_features': [1.0], 'max_samples': [1.0],\n", + " 'n_estimators': [10]},\n", + " scoring='accuracy')" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "grid_search.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'max_features': 1.0, 'max_samples': 1.0, 'n_estimators': 10}\n" + ] + } + ], + "source": [ + "print(grid_search.best_params_)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "best_model = grid_search.best_estimator_ " + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Best Model Accuracy: 0.8010590015128594\n" + ] + } + ], + "source": [ + "from sklearn.metrics import accuracy_score\n", + "\n", + "y_pred_best = best_model.predict(X_test)\n", + "acc_best = accuracy_score(y_test, y_pred_best)\n", + "\n", + "print(\"Best Model Accuracy:\", acc_best)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Default Bagging: 0.8011\n", + "Tuned Bagging: 0.8010590015128594\n" + ] + } + ], + "source": [ + "acc_bag = 0.8011 # aprox\n", + "\n", + "print(\"Default Bagging:\", acc_bag)\n", + "print(\"Tuned Bagging:\", acc_best)" + ] }, { "cell_type": "markdown", @@ -316,12 +1274,27 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "After performing Grid Search hyperparameter tuning on the Bagging Classifier, the results show no significant improvement compared to the default model.\n", + "\n", + "The best parameters found were:\n", + "- n_estimators = 10\n", + "- max_samples = 1.0\n", + "- max_features = 1.0\n", + "\n", + "The tuned model achieved an accuracy of approximately 0.8011, which is essentially the same as the default model.\n", + "\n", + "This suggests that:\n", + "- The default hyperparameters were already well-suited for this dataset.\n", + "- Further improvements are likely limited by the data itself rather than the model configuration.\n", + "\n", + "Therefore, hyperparameter tuning did not provide additional performance gains in this case." + ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "base", "language": "python", "name": "python3" }, @@ -335,7 +1308,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.13.9" } }, "nbformat": 4,