From a6182d755a357b6eea223e5c39eb75a8047ca03e Mon Sep 17 00:00:00 2001 From: carmamenriosrodriguez Date: Thu, 26 Feb 2026 12:41:34 +0100 Subject: [PATCH] Update lab-hyper-tuning.ipynb --- lab-hyper-tuning.ipynb | 3200 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 3155 insertions(+), 45 deletions(-) diff --git a/lab-hyper-tuning.ipynb b/lab-hyper-tuning.ipynb index 847d487..5202862 100644 --- a/lab-hyper-tuning.ipynb +++ b/lab-hyper-tuning.ipynb @@ -40,9 +40,23 @@ "outputs": [], "source": [ "#Libraries\n", + "from sklearn.datasets import fetch_california_housing\n", "import pandas as pd\n", "import numpy as np\n", - "from sklearn.model_selection import train_test_split" + "\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.tree import DecisionTreeRegressor\n", + "from sklearn.ensemble import BaggingRegressor, RandomForestRegressor,AdaBoostRegressor, GradientBoostingRegressor\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "\n", + "from sklearn.model_selection import GridSearchCV\n", + "from sklearn.model_selection import RandomizedSearchCV\n", + "\n", + "from sklearn.preprocessing import MinMaxScaler, StandardScaler, LabelEncoder\n", + "from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, accuracy_score" ] }, { @@ -216,112 +230,3208 @@ "source": [ "Now perform the same as before:\n", "- Feature Scaling\n", - "- Feature Selection\n" + "- Feature Selection" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ - "#your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "- Now let's use the best model we got so far in order to see how it can improve when we fine tune it's hyperparameters." + "#your code here\n", + "df = spaceship.drop(['PassengerId', 'Name', 'Cabin'], axis=1).dropna()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ - "#your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "- Evaluate your model" + "label = LabelEncoder()" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "categorical_cols = ['HomePlanet', 'CryoSleep', 'Destination', 'VIP']\n", + "for col in categorical_cols:\n", + " df[col] = label.fit_transform(df[col].astype(str))" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 6, "metadata": {}, + "outputs": [], "source": [ - "**Grid/Random Search**" + "features = df.drop(columns = [\"Transported\"])\n", + "target = df[\"Transported\"]" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 7, "metadata": {}, + "outputs": [], "source": [ - "For this lab we will use Grid Search." + "X_train, X_test, y_train, y_test = train_test_split(features, target, test_size = 0.20, random_state=0)" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 8, "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
MinMaxScaler()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "MinMaxScaler()" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "- Define hyperparameters to fine tune." + "normalizer = MinMaxScaler()\n", + "normalizer.fit(X_train)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ - "#your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "- Run Grid Search" + "X_train_norm = normalizer.transform(X_train)\n", + "\n", + "X_test_norm = normalizer.transform(X_test)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "X_train_norm = pd.DataFrame(X_train_norm, columns = X_train.columns)\n", + "X_test_norm = pd.DataFrame(X_test_norm, columns = X_test.columns)" + ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "- Evaluate your model" + "- Now let's use the best model we got so far in order to see how it can improve when we fine tune it's hyperparameters." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
RandomForestClassifier()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "RandomForestClassifier()" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Uso Random Forest\n", + "rnd_clf = RandomForestClassifier()\n", + "rnd_clf.fit(X_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Evaluate your model" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "pred_rnd = rnd_clf.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy Random Forest: 0.784115523465704\n" + ] + } + ], + "source": [ + "print(f\"Accuracy Random Forest: {accuracy_score(y_test, pred_rnd)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "## Vemos un 78 de exactitud" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Grid/Random Search**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For this lab we will use Grid Search." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Define hyperparameters to fine tune." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Run Grid Search" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "grid = {\"n_estimators\": [50, 100, 200,500],\n", + " \"max_leaf_nodes\": [250, 500, 1000, None],\n", + " \"max_depth\":[10,30,50]}" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "rnd_forest = RandomForestClassifier(random_state=42) " + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "model = GridSearchCV(estimator = rnd_forest, param_grid = grid, cv=5)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
GridSearchCV(cv=5, estimator=RandomForestClassifier(random_state=42),\n",
+       "             param_grid={'max_depth': [10, 30, 50],\n",
+       "                         'max_leaf_nodes': [250, 500, 1000, None],\n",
+       "                         'n_estimators': [50, 100, 200, 500]})
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "GridSearchCV(cv=5, estimator=RandomForestClassifier(random_state=42),\n", + " param_grid={'max_depth': [10, 30, 50],\n", + " 'max_leaf_nodes': [250, 500, 1000, None],\n", + " 'n_estimators': [50, 100, 200, 500]})" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.fit(X_train_norm, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'max_depth': 10, 'max_leaf_nodes': 250, 'n_estimators': 200}" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.best_params_" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "best_model = model.best_estimator_" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MAE 0.2072202166064982\n", + "R2 score 0.7927797833935019\n" + ] + } + ], + "source": [ + "pred = model.predict(X_test_norm)\n", + "\n", + "print(\"MAE\", mean_absolute_error(pred, y_test))\n", + "print(\"R2 score\", model.score(X_test_norm, y_test))" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mejores parámetros encontrados:\n", + "{'max_depth': 10, 'max_leaf_nodes': 250, 'n_estimators': 200}\n", + "\n", + "Mejor puntuación (accuracy/score) en validación: 0.7990\n" + ] + } + ], + "source": [ + "print(\"Mejores parámetros encontrados:\")\n", + "print(model.best_params_)\n", + "\n", + "print(f\"\\nMejor puntuación (accuracy/score) en validación: {model.best_score_:.4f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Evaluate your model" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "## Hemos conseguido mejorar los hiperparámetros y por tanto, la exactitud del modelo de 0.784115523465704 vs 0.7927797833935019" + ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "ds", "language": "python", "name": "python3" }, @@ -335,7 +3445,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.12.12" } }, "nbformat": 4,