diff --git a/lab-hyper-tuning.ipynb b/lab-hyper-tuning.ipynb index 847d487..5202862 100644 --- a/lab-hyper-tuning.ipynb +++ b/lab-hyper-tuning.ipynb @@ -40,9 +40,23 @@ "outputs": [], "source": [ "#Libraries\n", + "from sklearn.datasets import fetch_california_housing\n", "import pandas as pd\n", "import numpy as np\n", - "from sklearn.model_selection import train_test_split" + "\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.tree import DecisionTreeRegressor\n", + "from sklearn.ensemble import BaggingRegressor, RandomForestRegressor,AdaBoostRegressor, GradientBoostingRegressor\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "\n", + "from sklearn.model_selection import GridSearchCV\n", + "from sklearn.model_selection import RandomizedSearchCV\n", + "\n", + "from sklearn.preprocessing import MinMaxScaler, StandardScaler, LabelEncoder\n", + "from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, accuracy_score" ] }, { @@ -216,112 +230,3208 @@ "source": [ "Now perform the same as before:\n", "- Feature Scaling\n", - "- Feature Selection\n" + "- Feature Selection" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ - "#your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "- Now let's use the best model we got so far in order to see how it can improve when we fine tune it's hyperparameters." + "#your code here\n", + "df = spaceship.drop(['PassengerId', 'Name', 'Cabin'], axis=1).dropna()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ - "#your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "- Evaluate your model" + "label = LabelEncoder()" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "categorical_cols = ['HomePlanet', 'CryoSleep', 'Destination', 'VIP']\n", + "for col in categorical_cols:\n", + " df[col] = label.fit_transform(df[col].astype(str))" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 6, "metadata": {}, + "outputs": [], "source": [ - "**Grid/Random Search**" + "features = df.drop(columns = [\"Transported\"])\n", + "target = df[\"Transported\"]" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 7, "metadata": {}, + "outputs": [], "source": [ - "For this lab we will use Grid Search." + "X_train, X_test, y_train, y_test = train_test_split(features, target, test_size = 0.20, random_state=0)" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 8, "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
MinMaxScaler()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomForestClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
GridSearchCV(cv=5, estimator=RandomForestClassifier(random_state=42),\n",
+ " param_grid={'max_depth': [10, 30, 50],\n",
+ " 'max_leaf_nodes': [250, 500, 1000, None],\n",
+ " 'n_estimators': [50, 100, 200, 500]})In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. RandomForestClassifier(max_depth=10, max_leaf_nodes=250, n_estimators=200,\n", + " random_state=42)