diff --git a/.ipynb_checkpoints/Project_code-checkpoint.ipynb b/.ipynb_checkpoints/Project_code-checkpoint.ipynb index a02e94a..55d7a78 100644 --- a/.ipynb_checkpoints/Project_code-checkpoint.ipynb +++ b/.ipynb_checkpoints/Project_code-checkpoint.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 68, + "execution_count": 20, "id": "4cae7a4a-97dc-4eb0-9935-68eec9af8856", "metadata": {}, "outputs": [], @@ -24,12 +24,15 @@ "from sklearn.utils.class_weight import compute_sample_weight\n", "from imblearn.over_sampling import SMOTE\n", "from imblearn.pipeline import Pipeline\n", - "from sklearn.model_selection import RandomizedSearchCV\n", + "from sklearn.model_selection import RandomizedSearchCV, StratifiedKFold\n", + "\n", "from scipy.stats import randint\n", "from sklearn.preprocessing import LabelBinarizer\n", "from sklearn.svm import SVC\n", "from sklearn.svm import LinearSVC\n", "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.metrics import classification_report, recall_score, f1_score, precision_score, confusion_matrix\n", "\n", "# Deep learning libraries (uncomment as needed)\n", "# import tensorflow as tf\n", @@ -46,7 +49,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 15, "id": "fa1e894f-5998-4897-bba5-7d924a565efb", "metadata": { "scrolled": true @@ -90,7 +93,7 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 16, "id": "8119eef1-ec06-4ed0-bd03-0010ec6c9310", "metadata": {}, "outputs": [ @@ -115,7 +118,7 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 17, "id": "7a5c1300-531f-45f5-9788-41c17530f024", "metadata": { "scrolled": true @@ -184,7 +187,7 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 18, "id": "78100d74-312e-4025-8227-86fd6c9a2239", "metadata": {}, "outputs": [], @@ -217,7 +220,7 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 11, "id": "fe13ed78-f44b-4603-8f28-dabdd05570e2", "metadata": {}, "outputs": [], @@ -249,7 +252,9 @@ { "cell_type": "markdown", "id": "95fb8049-cc75-4f43-97e0-f6fbb0a853d2", - "metadata": {}, + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, "source": [ "

Simple MLP Classifier

" ] @@ -1424,7 +1429,9 @@ { "cell_type": "markdown", "id": "7c95fdc6-436b-432b-90a1-bf7def6f03c0", - "metadata": {}, + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, "source": [ "

Exploration

" ] @@ -2323,13 +2330,103 @@ { "cell_type": "markdown", "id": "905d931a-25c3-40d9-ba10-0891850c3146", - "metadata": { - "jp-MarkdownHeadingCollapsed": true - }, + "metadata": {}, "source": [ "

Optimization

" ] }, + { + "cell_type": "code", + "execution_count": 96, + "id": "528d074d-a34c-4e5b-b33d-ea2b5ec4ce03", + "metadata": {}, + "outputs": [], + "source": [ + "pipeline = Pipeline([\n", + " ('scaler', StandardScaler()),\n", + " ('smote', SMOTE(random_state=42)),\n", + " ('mlp', MLPClassifier(max_iter=30, random_state=42, early_stopping=True))\n", + "])" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "id": "fd3c2b57-4e28-447f-8da1-0a58ca2b37af", + "metadata": {}, + "outputs": [], + "source": [ + "param_dist = {\n", + " # Note: We are not tuning 'smote__k_neighbors' since it's fixed at 1\n", + " 'mlp__hidden_layer_sizes': [\n", + " (50, 25), \n", + " (100, 50), \n", + " (100, 100), \n", + " (150, 75, 25)\n", + " ],\n", + " 'mlp__activation': ['relu', 'tanh'],\n", + " 'mlp__alpha': uniform(0.0001, 0.01), # L2 regularization\n", + " 'mlp__learning_rate_init': uniform(0.001, 0.01)\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "id": "22208e47-07e4-46cb-b826-ead175b40419", + "metadata": {}, + "outputs": [], + "source": [ + "random_search = RandomizedSearchCV(\n", + " pipeline,\n", + " param_distributions=param_dist,\n", + " n_iter=20, # Number of combinations to try\n", + " cv=3, # 3-fold cross-validation\n", + " scoring='f1_macro', # The metric to optimize\n", + " n_jobs=-1, # Use all available CPU cores\n", + " random_state=42,\n", + " verbose=1\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "id": "5340493b-eb54-42b0-ad39-71a8279cd443", + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'y_train_1d' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mNameError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[99]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m random_search.fit(x_train, y_train_1d)\n", + "\u001b[31mNameError\u001b[39m: name 'y_train_1d' is not defined" + ] + } + ], + "source": [ + "random_search.fit(x_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38c91b76-f7cf-4369-86db-7cd419254f66", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"\\nOptimization complete!\")\n", + "print(f\"Best F1-macro score: {random_search.best_score_:.4f}\")\n", + "print(\"Best parameters found:\")\n", + "print(random_search.best_params_)\n", + "\n", + "# Your optimized model (pipeline) is now ready\n", + "best_smote_mlp = random_search.best_estimator_" + ] + }, { "cell_type": "markdown", "id": "c710796f-208d-4f83-8d9d-1f61048e0463", @@ -2460,9 +2557,7 @@ { "cell_type": "markdown", "id": "a78ed681", - "metadata": { - "jp-MarkdownHeadingCollapsed": true - }, + "metadata": {}, "source": [ "

Downsampled MLP

" ] @@ -2470,16 +2565,14 @@ { "cell_type": "markdown", "id": "cda0e90c-4779-4881-976a-ea4703677b98", - "metadata": { - "jp-MarkdownHeadingCollapsed": true - }, + "metadata": {}, "source": [ "

Exploration

" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 30, "id": "cc5234fd", "metadata": {}, "outputs": [ @@ -3402,9 +3495,7 @@ { "cell_type": "markdown", "id": "a4fb1ade", - "metadata": { - "jp-MarkdownHeadingCollapsed": true - }, + "metadata": {}, "source": [ "

Forest search

" ] @@ -3412,23 +3503,21 @@ { "cell_type": "markdown", "id": "ccf6c663-6ee5-4fc5-852a-4eac62556855", - "metadata": { - "jp-MarkdownHeadingCollapsed": true - }, + "metadata": {}, "source": [ "

Exploration

" ] }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 31, "id": "b18899be", "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
RandomForestClassifier(class_weight='balanced', random_state=42)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.