diff --git a/lab-hyper-tuning.ipynb b/lab-hyper-tuning.ipynb index 847d487..d4be8f3 100644 --- a/lab-hyper-tuning.ipynb +++ b/lab-hyper-tuning.ipynb @@ -219,13 +219,40 @@ "- Feature Selection\n" ] }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "#separarvariable objetivo\n", + "y = spaceship[\"Transported\"]\n", + "X = spaceship.drop(\"Transported\", axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "#seleccionr variables numéricas \n", + "\n", + "num_cols = X.select_dtypes(include=[\"int64\", \"float64\"]).columns\n", + "\n" + ] + }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "#aplicar standard scaler \n", + "from sklearn.preprocessing import StandardScaler\n", + "\n", + "scaler = StandardScaler()\n", + "X[num_cols] = scaler.fit_transform(X[num_cols])" ] }, { @@ -237,11 +264,1688 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RoomService 0.200815\n", + "Spa 0.196902\n", + "VRDeck 0.176947\n", + "FoodCourt 0.158392\n", + "ShoppingMall 0.138611\n", + "Age 0.128334\n", + "dtype: float64" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#random forest \n", + "from sklearn.ensemble import RandomForestClassifier\n", + "\n", + "rf = RandomForestClassifier(random_state=42)\n", + "rf.fit(X[num_cols], y)\n", + "\n", + "importances = pd.Series(rf.feature_importances_, index=num_cols)\n", + "importances.sort_values(ascending=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "#dividir datos \n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(\n", + " X, y, test_size=0.2, random_state=42\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "#separara columnas\n", + "cat_cols = X.select_dtypes(include=\"object\").columns\n", + "num_cols = X.select_dtypes(exclude=\"object\").columns" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Pipeline(steps=[('preprocessor',\n",
+ " ColumnTransformer(transformers=[('num',\n",
+ " Pipeline(steps=[('imputer',\n",
+ " SimpleImputer(strategy='median'))]),\n",
+ " Index(['Age', 'RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck'], dtype='object')),\n",
+ " ('cat',\n",
+ " Pipeline(steps=[('imputer',\n",
+ " SimpleImputer(strategy='most_frequent')),\n",
+ " ('onehot',\n",
+ " OneHotEncoder(handle_unknown='ignore'))]),\n",
+ " Index(['PassengerId', 'HomePlanet', 'CryoSleep', 'Cabin', 'Destination', 'VIP',\n",
+ " 'Name'],\n",
+ " dtype='object'))])),\n",
+ " ('classifier', RandomForestClassifier(random_state=42))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. Index(['Age', 'RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck'], dtype='object')
Index(['PassengerId', 'HomePlanet', 'CryoSleep', 'Cabin', 'Destination', 'VIP',\n", + " 'Name'],\n", + " dtype='object')
GridSearchCV(cv=5,\n",
+ " estimator=Pipeline(steps=[('preprocessor',\n",
+ " ColumnTransformer(transformers=[('num',\n",
+ " Pipeline(steps=[('imputer',\n",
+ " SimpleImputer(strategy='median'))]),\n",
+ " Index(['Age', 'RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck'], dtype='object')),\n",
+ " ('cat',\n",
+ " Pipeline(steps=[('imputer',\n",
+ " SimpleImputer(strategy='most_frequent')),\n",
+ " ('onehot',\n",
+ " OneHotEncoder(ha...\n",
+ " Index(['PassengerId', 'HomePlanet', 'CryoSleep', 'Cabin', 'Destination', 'VIP',\n",
+ " 'Name'],\n",
+ " dtype='object'))])),\n",
+ " ('classifier',\n",
+ " RandomForestClassifier(random_state=42))]),\n",
+ " n_jobs=-1,\n",
+ " param_grid={'classifier__max_depth': [None, 10, 20],\n",
+ " 'classifier__max_features': ['sqrt', 'log2'],\n",
+ " 'classifier__min_samples_leaf': [1, 2],\n",
+ " 'classifier__min_samples_split': [2, 5],\n",
+ " 'classifier__n_estimators': [100, 200]},\n",
+ " scoring='accuracy', verbose=2)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. Index(['Age', 'RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck'], dtype='object')
Index(['PassengerId', 'HomePlanet', 'CryoSleep', 'Cabin', 'Destination', 'VIP',\n", + " 'Name'],\n", + " dtype='object')