From 1f7eeae934ad660dba76c7dac3b2afe88e0363cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Jos=C3=A9=20Ruiz=20Bellido?= Date: Thu, 26 Feb 2026 12:00:03 +0100 Subject: [PATCH] Lab resolved; gitignore --- .gitignore | 1 + lab-hyper-tuning.ipynb | 598 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 582 insertions(+), 17 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ad6f926 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +lab-hyper-tuning_trad.ipynb diff --git a/lab-hyper-tuning.ipynb b/lab-hyper-tuning.ipynb index 847d487..a18d5b8 100644 --- a/lab-hyper-tuning.ipynb +++ b/lab-hyper-tuning.ipynb @@ -221,11 +221,55 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/1x/604g3jcd6s54pz1_9_q0yygr0000gn/T/ipykernel_59692/2818035475.py:12: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " clean_spaceship[\"Cabin\"] = clean_spaceship[\"Cabin\"].apply(apply_cabin_map)\n" + ] + } + ], "source": [ - "#your code here" + "#your code here\n", + "\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "\n", + "clean_spaceship = spaceship.dropna()\n", + "\n", + "clean_spaceship.isna().sum()\n", + "\n", + "def apply_cabin_map(value):\n", + " return value[0]\n", + "\n", + "clean_spaceship[\"Cabin\"] = clean_spaceship[\"Cabin\"].apply(apply_cabin_map)\n", + "\n", + "clean_spaceship = clean_spaceship.drop(columns=[\"PassengerId\", \"Name\"])\n", + "\n", + "clean_spaceship = pd.get_dummies(clean_spaceship, columns=[\"HomePlanet\", \"Cabin\", \"Destination\"])\n", + "\n", + "y = clean_spaceship[\"Transported\"]\n", + "X = clean_spaceship.drop(columns=[\"Transported\"])\n", + "\n", + "#your code here\n", + "\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(\n", + " X, y, test_size=0.20, random_state=0\n", + ")\n", + "\n", + "scaler = MinMaxScaler()\n", + "scaler.fit(X_train)\n", + "\n", + "X_train = pd.DataFrame(scaler.transform(X_train), columns=X_train.columns)\n", + "X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)" ] }, { @@ -237,11 +281,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "#your code here\n", + "\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.metrics import accuracy_score\n", + "\n", + "rf = RandomForestClassifier(\n", + " n_estimators=100,\n", + " random_state=0\n", + ")\n", + "\n", + "rf.fit(X_train, y_train)\n", + "\n", + "pred = rf.predict(X_test)" ] }, { @@ -253,11 +309,25 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.7957639939485628" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code here" + "#your code here\n", + "\n", + "accuracy_score(y_test, pred)\n", + "rf.score(X_test, y_test)" ] }, { @@ -283,11 +353,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "#your code here\n", + "\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.model_selection import GridSearchCV\n", + "\n", + "rf = RandomForestClassifier(random_state=0)\n", + "\n", + "param_grid = {\n", + " \"n_estimators\": [50, 100, 200, 500],\n", + " \"max_leaf_nodes\": [250, 500, 1000, None],\n", + " \"max_depth\": [10, 30, 50]\n", + "}\n" ] }, { @@ -299,10 +380,454 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
GridSearchCV(cv=5, estimator=RandomForestClassifier(random_state=0),\n",
+       "             param_grid={'max_depth': [10, 30, 50],\n",
+       "                         'max_leaf_nodes': [250, 500, 1000, None],\n",
+       "                         'n_estimators': [50, 100, 200, 500]},\n",
+       "             scoring='accuracy')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "GridSearchCV(cv=5, estimator=RandomForestClassifier(random_state=0),\n", + " param_grid={'max_depth': [10, 30, 50],\n", + " 'max_leaf_nodes': [250, 500, 1000, None],\n", + " 'n_estimators': [50, 100, 200, 500]},\n", + " scoring='accuracy')" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "grid = GridSearchCV(rf, param_grid=param_grid, scoring=\"accuracy\", cv=5)\n", + "grid.fit(X_train, y_train)" + ] }, { "cell_type": "markdown", @@ -311,17 +836,56 @@ "- Evaluate your model" ] }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "np.float64(0.8048823141538375)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "grid.best_params_\n", + "grid.best_score_" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "0.783661119515885" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "best_model = grid.best_estimator_\n", + "best_model.score(X_test, y_test)\n", + "\n", + "# El mejor score de GridSearch (CV) es ~0.80, pero en test baja a ~0.784,\n", + "# lo cual es normal porque CV evalúa en particiones de entrenamiento.\n", + "# El score en test es la métrica más realista del rendimiento final.\n" + ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "base", "language": "python", "name": "python3" }, @@ -335,7 +899,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.13.9" } }, "nbformat": 4,