From 9a77383077a9a86a126bd37a9910f23311a65f68 Mon Sep 17 00:00:00 2001 From: Ross Wilson Date: Thu, 26 Feb 2026 11:36:56 +0000 Subject: [PATCH] completed hyperparameter tuning lab --- lab-hyper-tuning.ipynb | 545 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 523 insertions(+), 22 deletions(-) diff --git a/lab-hyper-tuning.ipynb b/lab-hyper-tuning.ipynb index 847d487..208b991 100644 --- a/lab-hyper-tuning.ipynb +++ b/lab-hyper-tuning.ipynb @@ -35,11 +35,11 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "#Libraries\n", + "#Adding libraries\n", "import pandas as pd\n", "import numpy as np\n", "from sklearn.model_selection import train_test_split" @@ -47,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -200,7 +200,7 @@ "4 True " ] }, - "execution_count": 2, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -221,11 +221,28 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "#doing the same feature eng as before\n", + "spaceship.dropna(inplace=True)\n", + "\n", + "spaceship['Cabin'] = spaceship['Cabin'].apply(lambda x: x.split('/')[0])\n", + "spaceship.drop(columns=['PassengerId','Name'], inplace=True)\n", + "\n", + "spaceship = pd.get_dummies(spaceship, drop_first=True)\n", + "\n", + "# split\n", + "X = spaceship.drop(columns='Transported')\n", + "y = spaceship['Transported']\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "\n", + "# now we scale the features this time\n", + "from sklearn.preprocessing import StandardScaler\n", + "scaler = StandardScaler()\n", + "X_train = scaler.fit_transform(X_train)\n", + "X_test = scaler.transform(X_test)" ] }, { @@ -237,11 +254,444 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
KNeighborsClassifier()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "KNeighborsClassifier()" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code here" + "from sklearn.neighbors import KNeighborsClassifier\n", + "\n", + "# knn was our best model so lets use that\n", + "knn = KNeighborsClassifier()\n", + "knn.fit(X_train, y_train)" ] }, { @@ -253,11 +703,22 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "train acc: 0.825889477668433\n", + "test acc: 0.7874432677760969\n" + ] + } + ], "source": [ - "#your code here" + "print(f\"train acc: {knn.score(X_train, y_train)}\")\n", + "print(f\"test acc: {knn.score(X_test, y_test)}\")\n", + "# ok so this is the basline before tuning" ] }, { @@ -283,11 +744,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "from sklearn.model_selection import GridSearchCV\n", + "\n", + "# params to try\n", + "param_grid = {\n", + " 'n_neighbors': [3, 5, 7, 9, 11, 13, 15],\n", + " 'weights': ['uniform', 'distance'],\n", + " 'metric': ['euclidean', 'manhattan']\n", + "}" ] }, { @@ -299,10 +767,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "best params: {'metric': 'manhattan', 'n_neighbors': 15, 'weights': 'uniform'}\n", + "best cv score: 0.7746038315988647\n" + ] + } + ], + "source": [ + "grid_search = GridSearchCV(KNeighborsClassifier(), param_grid, cv=5, scoring='accuracy')\n", + "grid_search.fit(X_train, y_train)\n", + "\n", + "print(f\"best params: {grid_search.best_params_}\")\n", + "print(f\"best cv score: {grid_search.best_score_}\")" + ] }, { "cell_type": "markdown", @@ -313,15 +796,33 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "train acc: 0.8067751703255109\n", + "test acc: 0.7965204236006052\n" + ] + } + ], + "source": [ + "#using the best model from gridsearch\n", + "best_knn = grid_search.best_estimator_\n", + "\n", + "print(f\"train acc: {best_knn.score(X_train, y_train)}\")\n", + "print(f\"test acc: {best_knn.score(X_test, y_test)}\")\n", + "\n", + "#should be better thn the defualt knn had before\n", + "# the gridsearch tryed all the combintions and picked the best one" + ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "base", "language": "python", "name": "python3" }, @@ -335,7 +836,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.13.5" } }, "nbformat": 4,