diff --git a/lab-hyper-tuning.ipynb b/lab-hyper-tuning.ipynb index 847d487..450b515 100644 --- a/lab-hyper-tuning.ipynb +++ b/lab-hyper-tuning.ipynb @@ -47,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -200,7 +200,7 @@ "4 True " ] }, - "execution_count": 2, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -221,11 +221,74 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "#your code here\n", + "spaceship.dropna(inplace=True)\n", + "spaceship['Cabin'] = spaceship['Cabin'].str[0]\n", + "spaceship.drop(columns=['PassengerId','Name'],inplace=True)\n", + "spaceship = pd.get_dummies(spaceship, columns=['Cabin'])\n", + "spaceship = pd.get_dummies(spaceship, columns=['HomePlanet'])\n", + "spaceship = pd.get_dummies(spaceship, columns=['Destination'])" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/z1/hpkmqfl54717zbjq651n7__40000gn/T/ipykernel_47605/3921576397.py:1: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", + " spaceship = spaceship.replace({True: 1, False: 0})\n" + ] + } + ], + "source": [ + "spaceship = spaceship.replace({True: 1, False: 0})" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn.neighbors import KNeighborsClassifier\n", + "\n", + "from sklearn.preprocessing import MinMaxScaler, StandardScaler\n", + "\n", + "\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "# New in here:\n", + "from sklearn.ensemble import BaggingClassifier, RandomForestClassifier,AdaBoostClassifier, GradientBoostingClassifier\n", + "\n", + "from sklearn.preprocessing import MinMaxScaler, StandardScaler\n", + "from sklearn.metrics import accuracy_score" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "features = spaceship.drop(columns = [\"Transported\"])\n", + "target = spaceship['Transported']" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "X_train , X_test, y_train, y_test = train_test_split(features, target, test_size = 0.2, random_state = 0 )" ] }, { @@ -237,11 +300,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy for GB: 0.7881996974281392\n" + ] + } + ], "source": [ - "#your code here" + "#your code here\n", + "\n", + "gb_reg = GradientBoostingClassifier(max_depth=15,n_estimators=100)\n", + "\n", + "gb_reg.fit(X_train, y_train)\n", + "\n", + "pred = gb_reg.predict(X_test)\n", + "\n", + "print(f'Accuracy for GB: {accuracy_score(y_test, pred)}')" ] }, { @@ -283,11 +362,953 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "from sklearn.model_selection import GridSearchCV\n", + "from sklearn.model_selection import RandomizedSearchCV" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.metrics import r2_score, mean_absolute_error, root_mean_squared_error" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "#your code here\n", + "\n", + "grid_2 = {\"n_estimators\": [100, 300],\n", + " \"max_depth\":[3, 5]}" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "model_2 = GridSearchCV(estimator = gb_reg, param_grid = grid_2, cv = 3, n_jobs = -1)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
GridSearchCV(cv=3, estimator=GradientBoostingClassifier(max_depth=15),\n",
+ " n_jobs=-1,\n",
+ " param_grid={'max_depth': [3, 5], 'n_estimators': [100, 300]})In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. GridSearchCV(cv=3, estimator=GradientBoostingClassifier(max_depth=15),\n",
+ " n_jobs=-1,\n",
+ " param_grid={'max_depth': [3, 5], 'n_estimators': [100, 300]})GradientBoostingClassifier()
GradientBoostingClassifier()
GradientBoostingClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
GradientBoostingClassifier()