diff --git a/lab-hyper-tuning.ipynb b/lab-hyper-tuning.ipynb index 847d487..c80fb6e 100644 --- a/lab-hyper-tuning.ipynb +++ b/lab-hyper-tuning.ipynb @@ -221,11 +221,49 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "spaceship = spaceship.dropna()\n", + "spaceship.drop(columns=[\"PassengerId\",\"Name\"],inplace=True)\n", + "from sklearn.preprocessing import LabelEncoder\n", + "le = LabelEncoder()\n", + "spaceship[\"HomePlanet\"] = le.fit_transform(spaceship[\"HomePlanet\"])\n", + "spaceship[\"CryoSleep\"] = le.fit_transform(spaceship[\"CryoSleep\"])\n", + "spaceship[\"Cabin\"] = spaceship[\"Cabin\"].str[0]\n", + "spaceship[\"Cabin\"] = le.fit_transform(spaceship[\"Cabin\"])\n", + "spaceship[\"Destination\"] = le.fit_transform(spaceship[\"Destination\"])\n", + "spaceship[\"VIP\"] = le.fit_transform(spaceship[\"VIP\"])\n", + "\n", + "#your code here\n", + "features = spaceship.drop(columns=[\"Transported\"])\n", + "target = spaceship[\"Transported\"]\n", + "spaceship.select_dtypes(exclude=\"number\")\n", + "X_train, X_test, y_train, y_test = train_test_split(features, target, test_size = 0.20, random_state=0)\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "normalizer = MinMaxScaler()\n", + "normalizer.fit(X_train)\n", + "X_train_norm = normalizer.transform(X_train)\n", + "X_train_norm = pd.DataFrame(X_train_norm, columns = X_train.columns)\n", + "X_test_norm = normalizer.transform(X_test)\n", + "X_test_norm = pd.DataFrame(X_test_norm, columns = X_test.columns)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "features2 = features.drop(columns=[\"Age\",\"VIP\",\"FoodCourt\",\"ShoppingMall\"])\n", + "X_train, X_test, y_train, y_test = train_test_split(features2, target, test_size = 0.20, random_state=0)\n", + "normalizer = MinMaxScaler()\n", + "normalizer.fit(X_train)\n", + "X_train_norm = normalizer.transform(X_train)\n", + "X_train_norm = pd.DataFrame(X_train_norm, columns = X_train.columns)\n", + "X_test_norm = normalizer.transform(X_test)\n", + "X_test_norm = pd.DataFrame(X_test_norm, columns = X_test.columns)" ] }, { @@ -235,13 +273,1039 @@ "- Now let's use the best model we got so far in order to see how it can improve when we fine tune it's hyperparameters." ] }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.datasets import fetch_california_housing\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "# New in here:\n", + "from sklearn.ensemble import BaggingClassifier, RandomForestClassifier,AdaBoostClassifier, GradientBoostingClassifier\n", + "\n", + "from sklearn.preprocessing import MinMaxScaler, StandardScaler\n", + "from sklearn.metrics import r2_score, mean_absolute_error, root_mean_squared_error" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "forest = RandomForestClassifier(n_estimators = 100,\n", + " max_depth=15,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
RandomForestClassifier(max_depth=15)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
GridSearchCV(cv=5,\n",
+ " estimator=RandomForestClassifier(n_estimators=DecisionTreeClassifier()),\n",
+ " n_jobs=-1,\n",
+ " param_grid={'max_depth': [20, 30, 40, 50, 60],\n",
+ " 'max_leaf_nodes': [15, 30, 50, 60, 75],\n",
+ " 'n_estimators': [300, 400, 550, 750, 1000]})In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. RandomForestClassifier(max_depth=50, max_leaf_nodes=75, n_estimators=550)
RandomForestClassifier(max_depth=50, max_leaf_nodes=75, n_estimators=550)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.