diff --git a/lab-hyper-tuning.ipynb b/lab-hyper-tuning.ipynb
index 847d487..bea1f5a 100644
--- a/lab-hyper-tuning.ipynb
+++ b/lab-hyper-tuning.ipynb
@@ -35,178 +35,31 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Libraries\n",
"import pandas as pd\n",
"import numpy as np\n",
- "from sklearn.model_selection import train_test_split"
+ "\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.preprocessing import StandardScaler\n",
+ "from sklearn.linear_model import LogisticRegression\n",
+ "from sklearn.metrics import accuracy_score, classification_report\n",
+ "from sklearn.model_selection import GridSearchCV\n"
]
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " PassengerId | \n",
- " HomePlanet | \n",
- " CryoSleep | \n",
- " Cabin | \n",
- " Destination | \n",
- " Age | \n",
- " VIP | \n",
- " RoomService | \n",
- " FoodCourt | \n",
- " ShoppingMall | \n",
- " Spa | \n",
- " VRDeck | \n",
- " Name | \n",
- " Transported | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 0001_01 | \n",
- " Europa | \n",
- " False | \n",
- " B/0/P | \n",
- " TRAPPIST-1e | \n",
- " 39.0 | \n",
- " False | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " Maham Ofracculy | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 0002_01 | \n",
- " Earth | \n",
- " False | \n",
- " F/0/S | \n",
- " TRAPPIST-1e | \n",
- " 24.0 | \n",
- " False | \n",
- " 109.0 | \n",
- " 9.0 | \n",
- " 25.0 | \n",
- " 549.0 | \n",
- " 44.0 | \n",
- " Juanna Vines | \n",
- " True | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 0003_01 | \n",
- " Europa | \n",
- " False | \n",
- " A/0/S | \n",
- " TRAPPIST-1e | \n",
- " 58.0 | \n",
- " True | \n",
- " 43.0 | \n",
- " 3576.0 | \n",
- " 0.0 | \n",
- " 6715.0 | \n",
- " 49.0 | \n",
- " Altark Susent | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 0003_02 | \n",
- " Europa | \n",
- " False | \n",
- " A/0/S | \n",
- " TRAPPIST-1e | \n",
- " 33.0 | \n",
- " False | \n",
- " 0.0 | \n",
- " 1283.0 | \n",
- " 371.0 | \n",
- " 3329.0 | \n",
- " 193.0 | \n",
- " Solam Susent | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 0004_01 | \n",
- " Earth | \n",
- " False | \n",
- " F/1/S | \n",
- " TRAPPIST-1e | \n",
- " 16.0 | \n",
- " False | \n",
- " 303.0 | \n",
- " 70.0 | \n",
- " 151.0 | \n",
- " 565.0 | \n",
- " 2.0 | \n",
- " Willy Santantines | \n",
- " True | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " PassengerId HomePlanet CryoSleep Cabin Destination Age VIP \\\n",
- "0 0001_01 Europa False B/0/P TRAPPIST-1e 39.0 False \n",
- "1 0002_01 Earth False F/0/S TRAPPIST-1e 24.0 False \n",
- "2 0003_01 Europa False A/0/S TRAPPIST-1e 58.0 True \n",
- "3 0003_02 Europa False A/0/S TRAPPIST-1e 33.0 False \n",
- "4 0004_01 Earth False F/1/S TRAPPIST-1e 16.0 False \n",
- "\n",
- " RoomService FoodCourt ShoppingMall Spa VRDeck Name \\\n",
- "0 0.0 0.0 0.0 0.0 0.0 Maham Ofracculy \n",
- "1 109.0 9.0 25.0 549.0 44.0 Juanna Vines \n",
- "2 43.0 3576.0 0.0 6715.0 49.0 Altark Susent \n",
- "3 0.0 1283.0 371.0 3329.0 193.0 Solam Susent \n",
- "4 303.0 70.0 151.0 565.0 2.0 Willy Santantines \n",
- "\n",
- " Transported \n",
- "0 False \n",
- "1 True \n",
- "2 False \n",
- "3 False \n",
- "4 True "
- ]
- },
- "execution_count": 2,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
- "spaceship = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/spaceship_titanic.csv\")\n",
+ "spaceship = pd.read_csv(\n",
+ " \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/spaceship_titanic.csv\"\n",
+ ")\n",
+ "\n",
"spaceship.head()"
]
},
@@ -221,12 +74,10 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": null,
"metadata": {},
"outputs": [],
- "source": [
- "#your code here"
- ]
+ "source": []
},
{
"cell_type": "markdown",
@@ -241,30 +92,152 @@
"metadata": {},
"outputs": [],
"source": [
- "#your code here"
+ "#your code here\n",
+ "\n",
+ "X = spaceship.drop(\"Transported\", axis=1)\n",
+ "y = spaceship[\"Transported\"]\n",
+ "\n",
+ "# Eliminar columnas no numéricas / identificadores\n",
+ "X = X.drop([\"PassengerId\", \"Name\", \"Cabin\"], axis=1)\n",
+ "\n",
+ "# One-hot encoding\n",
+ "X = pd.get_dummies(X, drop_first=True)\n",
+ "\n",
+ "# Train / Test split\n",
+ "X_train, X_test, y_train, y_test = train_test_split(\n",
+ " X, y, test_size=0.2, random_state=42\n",
+ ")\n",
+ "\n",
+ "\n",
+ "# Escalado\n",
+ "scaler = StandardScaler()\n",
+ "X_train_scaled = scaler.fit_transform(X_train)\n",
+ "X_test_scaled = scaler.transform(X_test)\n",
+ "\n",
+ "# Modelo base\n",
+ "log_reg = LogisticRegression(max_iter=1000)\n",
+ "log_reg.fit(X_train_scaled, y_train)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "- Evaluate your model"
+ "- Evaluate your model\n",
+ "\n",
+ "scaler = StandardScaler()\n",
+ "\n",
+ "X_train_scaled = scaler.fit_transform(X_train)\n",
+ "X_test_scaled = scaler.transform(X_test)\n"
]
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 28,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Accuracy modelo optimizado: 0.7768832662449684\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " False 0.79 0.74 0.77 861\n",
+ " True 0.76 0.81 0.79 878\n",
+ "\n",
+ " accuracy 0.78 1739\n",
+ " macro avg 0.78 0.78 0.78 1739\n",
+ "weighted avg 0.78 0.78 0.78 1739\n",
+ "\n"
+ ]
+ }
+ ],
"source": [
- "#your code here"
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "\n",
+ "from sklearn.model_selection import train_test_split, GridSearchCV\n",
+ "from sklearn.preprocessing import StandardScaler\n",
+ "from sklearn.linear_model import LogisticRegression\n",
+ "from sklearn.metrics import accuracy_score, classification_report\n",
+ "from sklearn.impute import SimpleImputer\n",
+ "from sklearn.pipeline import Pipeline\n",
+ "\n",
+ "# =========================\n",
+ "# 1. Cargar datos\n",
+ "# =========================\n",
+ "spaceship = pd.read_csv(\n",
+ " \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/spaceship_titanic.csv\"\n",
+ ")\n",
+ "\n",
+ "# =========================\n",
+ "# 2. Separar X e y\n",
+ "# =========================\n",
+ "X = spaceship.drop(\"Transported\", axis=1)\n",
+ "y = spaceship[\"Transported\"]\n",
+ "\n",
+ "# =========================\n",
+ "# 3. Eliminar columnas irrelevantes\n",
+ "# =========================\n",
+ "X = X.drop([\"PassengerId\", \"Name\", \"Cabin\"], axis=1)\n",
+ "\n",
+ "# =========================\n",
+ "# 4. One-hot encoding\n",
+ "# =========================\n",
+ "X = pd.get_dummies(X, drop_first=True)\n",
+ "\n",
+ "# =========================\n",
+ "# 5. Train / Test split\n",
+ "# =========================\n",
+ "X_train, X_test, y_train, y_test = train_test_split(\n",
+ " X, y, test_size=0.2, random_state=42\n",
+ ")\n",
+ "\n",
+ "# =========================\n",
+ "# 6. Pipeline\n",
+ "# =========================\n",
+ "pipeline = Pipeline([\n",
+ " (\"imputer\", SimpleImputer(strategy=\"median\")),\n",
+ " (\"scaler\", StandardScaler()),\n",
+ " (\"model\", LogisticRegression(max_iter=1000))\n",
+ "])\n",
+ "\n",
+ "# =========================\n",
+ "# 7. Grid Search\n",
+ "# =========================\n",
+ "param_grid = {\n",
+ " \"model__C\": [0.01, 0.1, 1, 10]\n",
+ "}\n",
+ "\n",
+ "grid_search = GridSearchCV(\n",
+ " pipeline,\n",
+ " param_grid,\n",
+ " cv=5,\n",
+ " scoring=\"accuracy\",\n",
+ " n_jobs=-1\n",
+ ")\n",
+ "\n",
+ "grid_search.fit(X_train, y_train)\n",
+ "\n",
+ "# =========================\n",
+ "# 8. Evaluación final\n",
+ "# =========================\n",
+ "best_model = grid_search.best_estimator_\n",
+ "y_pred = best_model.predict(X_test)\n",
+ "\n",
+ "print(\"Accuracy modelo optimizado:\", accuracy_score(y_test, y_pred))\n",
+ "print(classification_report(y_test, y_pred))\n",
+ "\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "**Grid/Random Search**"
+ "**Grid/Random Search**\n",
+ "\n",
+ "\n"
]
},
{
@@ -278,7 +251,8 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "- Define hyperparameters to fine tune."
+ "- Define hyperparameters to fine tune.\n",
+ "\n"
]
},
{
@@ -287,7 +261,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#your code here"
+ "#your code here\n",
+ "\n"
]
},
{
@@ -299,10 +274,89 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 31,
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Accuracy modelo optimizado: 0.7768832662449684\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " False 0.79 0.74 0.77 861\n",
+ " True 0.76 0.81 0.79 878\n",
+ "\n",
+ " accuracy 0.78 1739\n",
+ " macro avg 0.78 0.78 0.78 1739\n",
+ "weighted avg 0.78 0.78 0.78 1739\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "\n",
+ "from sklearn.model_selection import train_test_split, GridSearchCV\n",
+ "from sklearn.preprocessing import StandardScaler\n",
+ "from sklearn.linear_model import LogisticRegression\n",
+ "from sklearn.metrics import accuracy_score, classification_report\n",
+ "from sklearn.impute import SimpleImputer\n",
+ "from sklearn.pipeline import Pipeline\n",
+ "\n",
+ "# 1. Cargar datos\n",
+ "spaceship = pd.read_csv(\n",
+ " \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/spaceship_titanic.csv\"\n",
+ ")\n",
+ "\n",
+ "# 2. Separar X e y\n",
+ "X = spaceship.drop(\"Transported\", axis=1)\n",
+ "y = spaceship[\"Transported\"]\n",
+ "\n",
+ "# 3. Eliminar columnas irrelevantes\n",
+ "X = X.drop([\"PassengerId\", \"Name\", \"Cabin\"], axis=1)\n",
+ "\n",
+ "# 4. One-hot encoding\n",
+ "X = pd.get_dummies(X, drop_first=True)\n",
+ "\n",
+ "# 5. Train / Test split\n",
+ "X_train, X_test, y_train, y_test = train_test_split(\n",
+ " X, y, test_size=0.2, random_state=42\n",
+ ")\n",
+ "\n",
+ "# 6. Pipeline\n",
+ "pipeline = Pipeline([\n",
+ " (\"imputer\", SimpleImputer(strategy=\"median\")),\n",
+ " (\"scaler\", StandardScaler()),\n",
+ " (\"model\", LogisticRegression(max_iter=1000))\n",
+ "])\n",
+ "\n",
+ "# 7. Grid Search\n",
+ "param_grid = {\n",
+ " \"model__C\": [0.01, 0.1, 1, 10]\n",
+ "}\n",
+ "\n",
+ "grid_search = GridSearchCV(\n",
+ " pipeline,\n",
+ " param_grid,\n",
+ " cv=5,\n",
+ " scoring=\"accuracy\",\n",
+ " n_jobs=-1\n",
+ ")\n",
+ "\n",
+ "grid_search.fit(X_train, y_train)\n",
+ "\n",
+ "# 8. Evaluación final\n",
+ "best_model = grid_search.best_estimator_\n",
+ "y_pred = best_model.predict(X_test)\n",
+ "\n",
+ "print(\"Accuracy modelo optimizado:\", accuracy_score(y_test, y_pred))\n",
+ "print(classification_report(y_test, y_pred))\n",
+ "\n",
+ "\n",
+ "\n"
+ ]
},
{
"cell_type": "markdown",
@@ -313,10 +367,42 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 32,
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Tuned Accuracy: 0.6716503737780334\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " False 0.66 0.69 0.68 861\n",
+ " True 0.68 0.65 0.67 878\n",
+ "\n",
+ " accuracy 0.67 1739\n",
+ " macro avg 0.67 0.67 0.67 1739\n",
+ "weighted avg 0.67 0.67 0.67 1739\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "c:\\Users\\deysi.galvez\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\sklearn\\utils\\validation.py:2691: UserWarning: X does not have valid feature names, but SimpleImputer was fitted with feature names\n",
+ " warnings.warn(\n"
+ ]
+ }
+ ],
+ "source": [
+ "best_model = grid_search.best_estimator_\n",
+ "\n",
+ "y_pred_best = best_model.predict(X_test_scaled)\n",
+ "\n",
+ "print(\"Tuned Accuracy:\", accuracy_score(y_test, y_pred_best))\n",
+ "print(classification_report(y_test, y_pred_best))\n"
+ ]
}
],
"metadata": {
@@ -335,7 +421,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.10.9"
+ "version": "3.12.9"
}
},
"nbformat": 4,