diff --git a/lab-hyper-tuning.ipynb b/lab-hyper-tuning.ipynb
index 847d487..e38b407 100644
--- a/lab-hyper-tuning.ipynb
+++ b/lab-hyper-tuning.ipynb
@@ -35,14 +35,34 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
- "#Libraries\n",
- "import pandas as pd\n",
- "import numpy as np\n",
- "from sklearn.model_selection import train_test_split"
+ "from sklearn.datasets import fetch_california_housing\n",
+ "\n",
+ "\n",
+ "\n",
+ "import matplotlib.pyplot as plt\n",
+ "import seaborn as sns\n",
+ "\n",
+ "\n",
+ "from sklearn.tree import DecisionTreeRegressor\n",
+ "# New in here:\n",
+ "from sklearn.ensemble import BaggingRegressor, RandomForestRegressor,AdaBoostRegressor, GradientBoostingRegressor\n",
+ "\n",
+ "from sklearn.preprocessing import MinMaxScaler, StandardScaler\n",
+ "from sklearn.metrics import r2_score, mean_absolute_error, root_mean_squared_error\n",
+ "\n",
+ "from sklearn.neighbors import KNeighborsClassifier\n",
+ "from sklearn.metrics import accuracy_score, classification_report, confusion_matrix\n",
+ "\n",
+ "#new in here \n",
+ "from sklearn.model_selection import GridSearchCV\n",
+ "from sklearn.model_selection import RandomizedSearchCV\n",
+ "\n",
+ "from sklearn.preprocessing import MinMaxScaler, StandardScaler\n",
+ "from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, root_mean_squared_error"
]
},
{
@@ -210,6 +230,333 @@
"spaceship.head()"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#clean the data \n",
+ "\n",
+ "spaceship.dropna(inplace=True)\n",
+ "spaceship.reset_index()\n",
+ "\n",
+ "spaceship[\"Cabin\"] = spaceship[\"Cabin\"].str.split(\"/\").str[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " HomePlanet | \n",
+ " CryoSleep | \n",
+ " Cabin | \n",
+ " Destination | \n",
+ " Age | \n",
+ " VIP | \n",
+ " RoomService | \n",
+ " FoodCourt | \n",
+ " ShoppingMall | \n",
+ " Spa | \n",
+ " VRDeck | \n",
+ " Transported | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Europa | \n",
+ " False | \n",
+ " B | \n",
+ " TRAPPIST-1e | \n",
+ " 39.0 | \n",
+ " False | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Earth | \n",
+ " False | \n",
+ " F | \n",
+ " TRAPPIST-1e | \n",
+ " 24.0 | \n",
+ " False | \n",
+ " 109.0 | \n",
+ " 9.0 | \n",
+ " 25.0 | \n",
+ " 549.0 | \n",
+ " 44.0 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " Europa | \n",
+ " False | \n",
+ " A | \n",
+ " TRAPPIST-1e | \n",
+ " 58.0 | \n",
+ " True | \n",
+ " 43.0 | \n",
+ " 3576.0 | \n",
+ " 0.0 | \n",
+ " 6715.0 | \n",
+ " 49.0 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " HomePlanet CryoSleep Cabin Destination Age VIP RoomService \\\n",
+ "0 Europa False B TRAPPIST-1e 39.0 False 0.0 \n",
+ "1 Earth False F TRAPPIST-1e 24.0 False 109.0 \n",
+ "2 Europa False A TRAPPIST-1e 58.0 True 43.0 \n",
+ "\n",
+ " FoodCourt ShoppingMall Spa VRDeck Transported \n",
+ "0 0.0 0.0 0.0 0.0 False \n",
+ "1 9.0 25.0 549.0 44.0 True \n",
+ "2 3576.0 0.0 6715.0 49.0 False "
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#drop coloums not needed \n",
+ "spaceship.drop([\"PassengerId\", \"Name\"], axis=1, inplace=True)\n",
+ "spaceship.head(3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#correct all the cat cols and add in dummy \n",
+ "\n",
+ "from numpy import dtype\n",
+ "\n",
+ "\n",
+ "categorical_cols = spaceship.select_dtypes(include=\"object\").columns\n",
+ "spaceship = pd.get_dummies(spaceship, \n",
+ " columns=categorical_cols, \n",
+ " drop_first=True,\n",
+ " dtype=int)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Age float64\n",
+ "RoomService float64\n",
+ "FoodCourt float64\n",
+ "ShoppingMall float64\n",
+ "Spa float64\n",
+ "VRDeck float64\n",
+ "Transported bool\n",
+ "HomePlanet_Europa int64\n",
+ "HomePlanet_Mars int64\n",
+ "CryoSleep_True int64\n",
+ "Cabin_B int64\n",
+ "Cabin_C int64\n",
+ "Cabin_D int64\n",
+ "Cabin_E int64\n",
+ "Cabin_F int64\n",
+ "Cabin_G int64\n",
+ "Cabin_T int64\n",
+ "Destination_PSO J318.5-22 int64\n",
+ "Destination_TRAPPIST-1e int64\n",
+ "VIP_True int64\n",
+ "dtype: object"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Age | \n",
+ " RoomService | \n",
+ " FoodCourt | \n",
+ " ShoppingMall | \n",
+ " Spa | \n",
+ " VRDeck | \n",
+ " Transported | \n",
+ " HomePlanet_Europa | \n",
+ " HomePlanet_Mars | \n",
+ " CryoSleep_True | \n",
+ " Cabin_B | \n",
+ " Cabin_C | \n",
+ " Cabin_D | \n",
+ " Cabin_E | \n",
+ " Cabin_F | \n",
+ " Cabin_G | \n",
+ " Cabin_T | \n",
+ " Destination_PSO J318.5-22 | \n",
+ " Destination_TRAPPIST-1e | \n",
+ " VIP_True | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 39.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 24.0 | \n",
+ " 109.0 | \n",
+ " 9.0 | \n",
+ " 25.0 | \n",
+ " 549.0 | \n",
+ " 44.0 | \n",
+ " True | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 58.0 | \n",
+ " 43.0 | \n",
+ " 3576.0 | \n",
+ " 0.0 | \n",
+ " 6715.0 | \n",
+ " 49.0 | \n",
+ " False | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Age RoomService FoodCourt ShoppingMall Spa VRDeck Transported \\\n",
+ "0 39.0 0.0 0.0 0.0 0.0 0.0 False \n",
+ "1 24.0 109.0 9.0 25.0 549.0 44.0 True \n",
+ "2 58.0 43.0 3576.0 0.0 6715.0 49.0 False \n",
+ "\n",
+ " HomePlanet_Europa HomePlanet_Mars CryoSleep_True Cabin_B Cabin_C \\\n",
+ "0 1 0 0 1 0 \n",
+ "1 0 0 0 0 0 \n",
+ "2 1 0 0 0 0 \n",
+ "\n",
+ " Cabin_D Cabin_E Cabin_F Cabin_G Cabin_T Destination_PSO J318.5-22 \\\n",
+ "0 0 0 0 0 0 0 \n",
+ "1 0 0 1 0 0 0 \n",
+ "2 0 0 0 0 0 0 \n",
+ "\n",
+ " Destination_TRAPPIST-1e VIP_True \n",
+ "0 1 0 \n",
+ "1 1 0 \n",
+ "2 1 1 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display(spaceship.dtypes)\n",
+ "display(spaceship.head(3))"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -221,11 +568,634 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Age | \n",
+ " RoomService | \n",
+ " FoodCourt | \n",
+ " ShoppingMall | \n",
+ " Spa | \n",
+ " VRDeck | \n",
+ " HomePlanet_Europa | \n",
+ " HomePlanet_Mars | \n",
+ " CryoSleep_True | \n",
+ " Cabin_B | \n",
+ " Cabin_C | \n",
+ " Cabin_D | \n",
+ " Cabin_E | \n",
+ " Cabin_F | \n",
+ " Cabin_G | \n",
+ " Cabin_T | \n",
+ " Destination_PSO J318.5-22 | \n",
+ " Destination_TRAPPIST-1e | \n",
+ " VIP_True | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 39.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 24.0 | \n",
+ " 109.0 | \n",
+ " 9.0 | \n",
+ " 25.0 | \n",
+ " 549.0 | \n",
+ " 44.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 58.0 | \n",
+ " 43.0 | \n",
+ " 3576.0 | \n",
+ " 0.0 | \n",
+ " 6715.0 | \n",
+ " 49.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Age RoomService FoodCourt ShoppingMall Spa VRDeck \\\n",
+ "0 39.0 0.0 0.0 0.0 0.0 0.0 \n",
+ "1 24.0 109.0 9.0 25.0 549.0 44.0 \n",
+ "2 58.0 43.0 3576.0 0.0 6715.0 49.0 \n",
+ "\n",
+ " HomePlanet_Europa HomePlanet_Mars CryoSleep_True Cabin_B Cabin_C \\\n",
+ "0 1 0 0 1 0 \n",
+ "1 0 0 0 0 0 \n",
+ "2 1 0 0 0 0 \n",
+ "\n",
+ " Cabin_D Cabin_E Cabin_F Cabin_G Cabin_T Destination_PSO J318.5-22 \\\n",
+ "0 0 0 0 0 0 0 \n",
+ "1 0 0 1 0 0 0 \n",
+ "2 0 0 0 0 0 0 \n",
+ "\n",
+ " Destination_TRAPPIST-1e VIP_True \n",
+ "0 1 0 \n",
+ "1 1 0 \n",
+ "2 1 1 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "0 False\n",
+ "1 True\n",
+ "2 False\n",
+ "Name: Transported, dtype: bool"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "features = spaceship.drop(\"Transported\", axis=1)\n",
+ "target = spaceship[\"Transported\"]\n",
+ "display(features.head(3))\n",
+ "display(target.head(3))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
- "#your code here"
+ "X_train, X_test, y_train, y_test = train_test_split(\n",
+ " features, \n",
+ " target, \n",
+ " test_size=0.2, \n",
+ " random_state=0\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "scaler = StandardScaler()\n",
+ "\n",
+ "X_train_scaled = scaler.fit_transform(X_train)\n",
+ "X_test_scaled = scaler.transform(X_test)\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "KNeighborsClassifier()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
+ ],
+ "text/plain": [
+ "KNeighborsClassifier()"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#train the model\n",
+ "\n",
+ "knn = KNeighborsClassifier(n_neighbors=5)\n",
+ "\n",
+ "knn.fit(X_train_scaled, y_train)"
]
},
{
@@ -237,11 +1207,22 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 14,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.7586989409984871"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code here"
+ "knn.score(X_test_scaled, y_test)"
]
},
{
@@ -283,11 +1264,55 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 17,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Best parameters: {'metric': 'minkowski', 'n_neighbors': 7, 'p': 2, 'weights': 'uniform'}\n",
+ "Best CV score: 0.7757362532610876\n",
+ "Test score: 0.773071104387292\n"
+ ]
+ }
+ ],
"source": [
- "#your code here"
+ "from sklearn.model_selection import GridSearchCV\n",
+ "from sklearn.neighbors import KNeighborsClassifier\n",
+ "\n",
+ "# define parameter grid\n",
+ "grid_2 = {\n",
+ " \"n_neighbors\": [3, 5, 7, 9, 11],\n",
+ " \"weights\": [\"uniform\", \"distance\"],\n",
+ " \"metric\": [\"minkowski\", \"euclidean\", \"manhattan\"],\n",
+ " \"p\": [1, 2] # 1 = Manhattan, 2 = Euclidean (only used for minkowski)\n",
+ "}\n",
+ "\n",
+ "# initialize model\n",
+ "knn = KNeighborsClassifier()\n",
+ "\n",
+ "# grid search\n",
+ "grid_search = GridSearchCV(\n",
+ " estimator=knn,\n",
+ " param_grid=grid_2,\n",
+ " cv=5,\n",
+ " scoring=\"accuracy\",\n",
+ " n_jobs=-1\n",
+ ")\n",
+ "\n",
+ "# fit on scaled training data\n",
+ "grid_search.fit(X_train_scaled, y_train)\n",
+ "\n",
+ "# best parameters\n",
+ "print(\"Best parameters:\", grid_search.best_params_)\n",
+ "\n",
+ "# best CV score\n",
+ "print(\"Best CV score:\", grid_search.best_score_)\n",
+ "\n",
+ "# evaluate on test set\n",
+ "best_knn = grid_search.best_estimator_\n",
+ "print(\"Test score:\", best_knn.score(X_test_scaled, y_test))"
]
},
{
@@ -321,7 +1346,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "data-analytics",
"language": "python",
"name": "python3"
},
@@ -335,7 +1360,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.10.9"
+ "version": "3.9.23"
}
},
"nbformat": 4,