From 779d4d7de9874a92edd2f5277b8ae5f378e2fa30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Maria=20Monteiro?= Date: Mon, 2 Mar 2026 08:20:52 +0000 Subject: [PATCH] hyperdone --- lab-hyper-tuning.ipynb | 76 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 68 insertions(+), 8 deletions(-) diff --git a/lab-hyper-tuning.ipynb b/lab-hyper-tuning.ipynb index 847d487..45c0d2f 100644 --- a/lab-hyper-tuning.ipynb +++ b/lab-hyper-tuning.ipynb @@ -221,11 +221,35 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "# Drop irrelevant columns\n", + "spaceship = spaceship.drop(columns=[\"PassengerId\", \"Name\", \"Cabin\"])\n", + "\n", + "# Target\n", + "y = spaceship[\"Transported\"]\n", + "X = spaceship.drop(columns=[\"Transported\"])\n", + "\n", + "# One-hot encoding for categorical variables\n", + "X = pd.get_dummies(X, drop_first=True)\n", + "\n", + "# Handle missing values\n", + "X = X.fillna(X.median(numeric_only=True))\n", + "\n", + "# Train-test split\n", + "X_train, X_test, y_train, y_test = train_test_split(\n", + " X, y, test_size=0.2, random_state=42\n", + ")\n", + "\n", + "# Feature Scaling\n", + "from sklearn.preprocessing import StandardScaler\n", + "\n", + "scaler = StandardScaler()\n", + "\n", + "X_train_scaled = scaler.fit_transform(X_train)\n", + "X_test_scaled = scaler.transform(X_test)" ] }, { @@ -241,7 +265,10 @@ "metadata": {}, "outputs": [], "source": [ - "#your code here" + "from sklearn.neighbors import KNeighborsClassifier\n", + "\n", + "knn = KNeighborsClassifier()\n", + "knn.fit(X_train_scaled, y_train)" ] }, { @@ -253,11 +280,17 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "from sklearn.metrics import accuracy_score, classification_report\n", + "\n", + "y_pred = knn.predict(X_test_scaled)\n", + "\n", + "print(\"Accuracy:\", accuracy_score(y_test, y_pred))\n", + "print(\"\\nClassification Report:\\n\")\n", + "print(classification_report(y_test, y_pred))" ] }, { @@ -287,7 +320,13 @@ "metadata": {}, "outputs": [], "source": [ - "#your code here" + "from sklearn.model_selection import GridSearchCV\n", + "\n", + "param_grid = {\n", + " \"n_neighbors\": [3, 5, 7, 9, 11, 15],\n", + " \"weights\": [\"uniform\", \"distance\"],\n", + " \"metric\": [\"euclidean\", \"manhattan\"]\n", + "}" ] }, { @@ -302,7 +341,20 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "grid = GridSearchCV(\n", + " KNeighborsClassifier(),\n", + " param_grid,\n", + " cv=5,\n", + " scoring=\"accuracy\",\n", + " n_jobs=-1\n", + ")\n", + "\n", + "grid.fit(X_train_scaled, y_train)\n", + "\n", + "print(\"Best Parameters:\", grid.best_params_)\n", + "print(\"Best CV Score:\", grid.best_score_)" + ] }, { "cell_type": "markdown", @@ -316,7 +368,15 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "best_knn = grid.best_estimator_\n", + "\n", + "y_pred_best = best_knn.predict(X_test_scaled)\n", + "\n", + "print(\"Final Accuracy:\", accuracy_score(y_test, y_pred_best))\n", + "print(\"\\nClassification Report:\\n\")\n", + "print(classification_report(y_test, y_pred_best))" + ] } ], "metadata": {