From 779d4d7de9874a92edd2f5277b8ae5f378e2fa30 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Maria=20Monteiro?= <joaomariaveloso@gmail.com>
Date: Mon, 2 Mar 2026 08:20:52 +0000
Subject: [PATCH] hyperdone

---
 lab-hyper-tuning.ipynb | 76 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 68 insertions(+), 8 deletions(-)

diff --git a/lab-hyper-tuning.ipynb b/lab-hyper-tuning.ipynb
index 847d487..45c0d2f 100644
--- a/lab-hyper-tuning.ipynb
+++ b/lab-hyper-tuning.ipynb
@@ -221,11 +221,35 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "#your code here"
+    "# Drop irrelevant columns\n",
+    "spaceship = spaceship.drop(columns=[\"PassengerId\", \"Name\", \"Cabin\"])\n",
+    "\n",
+    "# Target\n",
+    "y = spaceship[\"Transported\"]\n",
+    "X = spaceship.drop(columns=[\"Transported\"])\n",
+    "\n",
+    "# One-hot encoding for categorical variables\n",
+    "X = pd.get_dummies(X, drop_first=True)\n",
+    "\n",
+    "# Handle missing values\n",
+    "X = X.fillna(X.median(numeric_only=True))\n",
+    "\n",
+    "# Train-test split\n",
+    "X_train, X_test, y_train, y_test = train_test_split(\n",
+    "    X, y, test_size=0.2, random_state=42\n",
+    ")\n",
+    "\n",
+    "# Feature Scaling\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "\n",
+    "scaler = StandardScaler()\n",
+    "\n",
+    "X_train_scaled = scaler.fit_transform(X_train)\n",
+    "X_test_scaled = scaler.transform(X_test)"
    ]
   },
   {
@@ -241,7 +265,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#your code here"
+    "from sklearn.neighbors import KNeighborsClassifier\n",
+    "\n",
+    "knn = KNeighborsClassifier()\n",
+    "knn.fit(X_train_scaled, y_train)"
    ]
   },
   {
@@ -253,11 +280,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "#your code here"
+    "from sklearn.metrics import accuracy_score, classification_report\n",
+    "\n",
+    "y_pred = knn.predict(X_test_scaled)\n",
+    "\n",
+    "print(\"Accuracy:\", accuracy_score(y_test, y_pred))\n",
+    "print(\"\\nClassification Report:\\n\")\n",
+    "print(classification_report(y_test, y_pred))"
    ]
   },
   {
@@ -287,7 +320,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#your code here"
+    "from sklearn.model_selection import GridSearchCV\n",
+    "\n",
+    "param_grid = {\n",
+    "    \"n_neighbors\": [3, 5, 7, 9, 11, 15],\n",
+    "    \"weights\": [\"uniform\", \"distance\"],\n",
+    "    \"metric\": [\"euclidean\", \"manhattan\"]\n",
+    "}"
    ]
   },
   {
@@ -302,7 +341,20 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "grid = GridSearchCV(\n",
+    "    KNeighborsClassifier(),\n",
+    "    param_grid,\n",
+    "    cv=5,\n",
+    "    scoring=\"accuracy\",\n",
+    "    n_jobs=-1\n",
+    ")\n",
+    "\n",
+    "grid.fit(X_train_scaled, y_train)\n",
+    "\n",
+    "print(\"Best Parameters:\", grid.best_params_)\n",
+    "print(\"Best CV Score:\", grid.best_score_)"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -316,7 +368,15 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "best_knn = grid.best_estimator_\n",
+    "\n",
+    "y_pred_best = best_knn.predict(X_test_scaled)\n",
+    "\n",
+    "print(\"Final Accuracy:\", accuracy_score(y_test, y_pred_best))\n",
+    "print(\"\\nClassification Report:\\n\")\n",
+    "print(classification_report(y_test, y_pred_best))"
+   ]
   }
  ],
  "metadata": {