diff --git a/lab-hyper-tuning.ipynb b/lab-hyper-tuning.ipynb index 847d487..450b515 100644 --- a/lab-hyper-tuning.ipynb +++ b/lab-hyper-tuning.ipynb @@ -47,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -200,7 +200,7 @@ "4 True " ] }, - "execution_count": 2, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -221,11 +221,74 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "#your code here\n", + "spaceship.dropna(inplace=True)\n", + "spaceship['Cabin'] = spaceship['Cabin'].str[0]\n", + "spaceship.drop(columns=['PassengerId','Name'],inplace=True)\n", + "spaceship = pd.get_dummies(spaceship, columns=['Cabin'])\n", + "spaceship = pd.get_dummies(spaceship, columns=['HomePlanet'])\n", + "spaceship = pd.get_dummies(spaceship, columns=['Destination'])" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/z1/hpkmqfl54717zbjq651n7__40000gn/T/ipykernel_47605/3921576397.py:1: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", + " spaceship = spaceship.replace({True: 1, False: 0})\n" + ] + } + ], + "source": [ + "spaceship = spaceship.replace({True: 1, False: 0})" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn.neighbors import KNeighborsClassifier\n", + "\n", + "from sklearn.preprocessing import MinMaxScaler, StandardScaler\n", + "\n", + "\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "# New in here:\n", + "from sklearn.ensemble import BaggingClassifier, RandomForestClassifier,AdaBoostClassifier, GradientBoostingClassifier\n", + "\n", + "from sklearn.preprocessing import MinMaxScaler, StandardScaler\n", + "from sklearn.metrics import accuracy_score" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "features = spaceship.drop(columns = [\"Transported\"])\n", + "target = spaceship['Transported']" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "X_train , X_test, y_train, y_test = train_test_split(features, target, test_size = 0.2, random_state = 0 )" ] }, { @@ -237,11 +300,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy for GB: 0.7881996974281392\n" + ] + } + ], "source": [ - "#your code here" + "#your code here\n", + "\n", + "gb_reg = GradientBoostingClassifier(max_depth=15,n_estimators=100)\n", + "\n", + "gb_reg.fit(X_train, y_train)\n", + "\n", + "pred = gb_reg.predict(X_test)\n", + "\n", + "print(f'Accuracy for GB: {accuracy_score(y_test, pred)}')" ] }, { @@ -283,11 +362,953 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "from sklearn.model_selection import GridSearchCV\n", + "from sklearn.model_selection import RandomizedSearchCV" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.metrics import r2_score, mean_absolute_error, root_mean_squared_error" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "#your code here\n", + "\n", + "grid_2 = {\"n_estimators\": [100, 300],\n", + " \"max_depth\":[3, 5]}" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "model_2 = GridSearchCV(estimator = gb_reg, param_grid = grid_2, cv = 3, n_jobs = -1)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
GridSearchCV(cv=3, estimator=GradientBoostingClassifier(max_depth=15),\n",
+       "             n_jobs=-1,\n",
+       "             param_grid={'max_depth': [3, 5], 'n_estimators': [100, 300]})
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "GridSearchCV(cv=3, estimator=GradientBoostingClassifier(max_depth=15),\n", + " n_jobs=-1,\n", + " param_grid={'max_depth': [3, 5], 'n_estimators': [100, 300]})" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_2.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'max_depth': 3, 'n_estimators': 100}" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_2.best_params_" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "best_model_2 = model_2.best_estimator_" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
GradientBoostingClassifier()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "GradientBoostingClassifier()" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "best_model_2" ] }, { @@ -313,15 +1334,36 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "pred = best_model_2.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.7859304084720121\n" + ] + } + ], + "source": [ + "from sklearn.metrics import accuracy_score\n", + "pred = best_model_2.predict(X_test)\n", + "print('Accuracy:', accuracy_score(y_test, pred))" + ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "base", "language": "python", "name": "python3" }, @@ -335,7 +1377,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.13.5" } }, "nbformat": 4,