From 9b6d943ad07002612c0a7f1fc6ee1e1e27f047ec Mon Sep 17 00:00:00 2001 From: D V Date: Tue, 25 Nov 2025 21:42:13 +0100 Subject: [PATCH 01/12] eda_borrower done --- src/eda_borrower.py | 114 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 src/eda_borrower.py diff --git a/src/eda_borrower.py b/src/eda_borrower.py new file mode 100644 index 0000000..3318a34 --- /dev/null +++ b/src/eda_borrower.py @@ -0,0 +1,114 @@ +# -*- coding: utf-8 -*- +"""eda_borrower.py + +Automatically generated by Colab. + +Original file is located at + https://colab.research.google.com/drive/1Xvxr8HvPPx6PW8OzzI-Ki4Z4_UV3SCBZ +""" + +import pandas as pd +from typing import Dict, Any, List, Callable + +BORROWER_COLS = [ + "id", "member_id", + "emp_title", "emp_length", + "home_ownership", + "annual_inc", "annual_inc_joint", + "verification_status", "verification_status_joint", + "zip_code", "addr_state", + "purpose", "title", "desc", + "issue_d", "pymnt_plan", "policy_code", + "url", +] + +class BorrowerProfileEDA: + def __init__(self, df: pd.DataFrame, target_col: str = "loan_status"): + """ + Store the full DataFrame and the name of the target column. + """ + self.df = df + self.target_col = target_col + + def structure_summary(self) -> pd.DataFrame: + """ + Return a DataFrame with one row per column in BORROWER_COLS: + - column: column name + - dtype: pandas dtype + - n_missing: number of missing values + - missing_pct: percentage of missing values + - n_unique: number of unique values + - goal is to create a function that tells me all the information above about the columns in borrower_cols + - if I dont find any data return none + """ + + rows = [] + + for col in BORROWER_COLS: + if col not in self.df.columns: + rows.append({ + "column" : col, + "dtype" : None, + "n_missing" : None, + "missing_pct" : None, + "n_unique" : None + }) + else: + s = self.df[col] + + rows.append({ + "column" : col, + "dtype" : s.dtype, + "n_missing" : s.isna().sum(), + "missing_pct" : (s.isna().mean() *100), + "n_unique" : s.nunique(dropna=True) + }) + return pd.DataFrame(rows) + + def income_summary(self) -> pd.DataFrame: + """ + Return basic stats (count, mean, std, min, max, quartiles) + for: + - annual_inc + - annual_inc_joint + + Use df[["annual_inc", "annual_inc_joint"]].describe().T + or equivalent. + """ + + return self.df[["annual_inc", "annual_inc_joint"]].describe().T + + def categorical_freqs(self, max_levels: int = 10) -> Dict[str, pd.Series]: + """ + For important categorical borrower columns (e.g. home_ownership, + addr_state, purpose), return a dict: + + { + "home_ownership": Series of top levels, + "addr_state": Series of top levels, + ... + } + + Each Series should be the result of value_counts().head(max_levels). + """ + cat_cols = ['home_ownership', 'addr_state', 'purpose'] + + result = {} + + for col in cat_cols: + if col in self.df.columns: + result[col] = self.df[col].value_counts.head(max_levels) + else: + result[col] = None + + return result + + def default_rate_by_category(self, col: str) -> pd.Series: + """ + For a given categorical column (e.g. 'home_ownership' or 'purpose'), + compute the default rate per category. + + Default rate = mean of self.target_col for each category. + Return a pandas Series indexed by category, with values in [0, 1]. + """ + return self.df.groupby(col)[self.target_col].mean() \ No newline at end of file From 77f202ba6891ac08f84865b24a92a76c671d90d8 Mon Sep 17 00:00:00 2001 From: D V Date: Wed, 26 Nov 2025 15:49:39 +0100 Subject: [PATCH 02/12] eda demo --- notebooks/eda_borrower_demo.ipynb | 79 +++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 notebooks/eda_borrower_demo.ipynb diff --git a/notebooks/eda_borrower_demo.ipynb b/notebooks/eda_borrower_demo.ipynb new file mode 100644 index 0000000..192e255 --- /dev/null +++ b/notebooks/eda_borrower_demo.ipynb @@ -0,0 +1,79 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "knS_VAzEkWuW" + }, + "outputs": [], + "source": [ + "\"\"\"\n", + "1. Load the dataset:\n", + "\n", + " import pandas as pd\n", + " from src.eda_borrower import BorrowerProfileEDA, run_borrower_eda_pipeline\n", + "\n", + " df = pd.read_csv(\"data/loan_sample.csv\") # or the correct path in your repo\n", + "\n", + "\n", + "2. Instantiate the EDA class:\n", + "\n", + " eda = BorrowerProfileEDA(df, target_col=\"loan_status\")\n", + "\n", + "\n", + "3. Run the pipeline and inspect the results:\n", + "\n", + " report = run_borrower_eda_pipeline(eda)\n", + "\n", + "\n", + "4. Display at least:\n", + "\n", + " report[\"structure\"] # table of borrower column structure\n", + " report[\"income\"] # income stats\n", + " report[\"freqs\"] # categorical frequencies\n", + " report[\"default_by_home_ownership\"] # default rate by home_ownership\n", + " report[\"default_by_purpose\"] # default rate by purpose\n", + "\n", + "\n", + "\n", + "You can add markdown cells explaining what each result means in plain language (e.g., class imbalance, missingness, etc.).\n", + "\n", + "Acceptance Criteria ✅\n", + "---------------------\n", + "\n", + "- `BorrowerProfileEDA`:\n", + "\n", + " - Initializes correctly with a DataFrame.\n", + " - `structure_summary()` returns a DataFrame with the requested columns/metrics.\n", + " - `income_summary()` returns a DataFrame with stats for `annual_inc` and `annual_inc_joint`.\n", + " - `categorical_freqs()` returns a dict of Series with top categories.\n", + " - `default_rate_by_category(col)` returns a Series of default rates per category.\n", + "- Functional pipeline:\n", + "\n", + " - `borrower_eda_steps(eda)` returns a dict of callables.\n", + " - `run_borrower_eda_pipeline(eda)` iterates over that dict, calls each function, and returns a dict of results.\n", + "- Notebook:\n", + "\n", + " - Runs top-to-bottom without errors.\n", + " - Shows the structure summary, income summary, categorical frequencies, and default-rate-by-category analysis.\n", + " - Contains only EDA (no model training).\n", + " \"\"\"" + ] + } + ] +} \ No newline at end of file From 6a53fd6b28a1f39e914bed08c72e4f5dd452856a Mon Sep 17 00:00:00 2001 From: D V Date: Wed, 26 Nov 2025 15:52:27 +0100 Subject: [PATCH 03/12] eda with pipeline completed --- .DS_Store | Bin 0 -> 8196 bytes src/.DS_Store | Bin 0 -> 6148 bytes 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 .DS_Store create mode 100644 src/.DS_Store diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..a95fbfd24f88ddd81883ab3d66b7ef9b741dad95 GIT binary patch literal 8196 zcmeHMO>fgc5S?vG<4^&Spi(&?SuMvZq+bYxkkT|cR00kSf&-us*9NS*-jFyAsH&7Z z{09C4XZ{HPg%iBl-4xqRPeHVTU1@hVyYG3|Z)WXGLPVlI@K=cziO4}^Tb@TVq3}G{ zQ)R}CEWirziClV!xYwAFMH znoh#3EKG(X)au~53QnS?t!=FWR)NzBaPEGMZc~>Al;F($&fjB_7zt=;Cz5+!Bt0Pc zaBZPheyV~VAAO%XKz4GtuV>-BVZUpx_s#1@VLz!>zl)i|?4`M)Q*`E?SFLw)*h)Id zpkMFwUht=!LVJvu`T1I6UISL5kfxzaW^*Ck5}BI`)gJ7?`|9)mz;&=I}di-`_VxxKdDSdDGpWd04v1I!~PQbKuLCog2N@vcYq9l9NKE8h_}-o7>5S!*$38ccN`8(GQgiB;+4EVS=C>A{nWQ;7d5F0(MMcR#*;>3>x0E z#VRnJ0tHQ9S5Z-O0-BN@c6!aGGS}?7xAYMYOFJMFuDm5WRgE3p$)Er77cYPsW#OHBl zcOw?-Rm9G~?l-@?*$=Wmj4|%d;tpdrV@yCp0+viQC{o7C|27v#B3WuF<-XGL0%dj4qQ= zF|@Z&WtN9YHkzn{BpyM^%~g`cvhd|Bi>InK&;ijD&7r+FpLaWMzwPw;hl{o|KkmAq zAN3ZCrr6m(I5{6arcarC(Ts8+T*;Qf3SL3^T+nkc$x@l!gRjc3@(76mVt^PR2G*7V zb0mn?+A=^3CkBXtpBTXX!GVV887wubtphr|K4aWML;)S&5{QmL&tRz$A|PCs0_sw3 zo)}!0gWoZEp21S1E@xcL4C9!YtH%phvxDDJ>5O|CsV4@Afn^5j+O+ZfKZ9RpX7FdEH&aN S=vU={bP-U5P)7{>0s|jNhDu}r literal 0 HcmV?d00001 From 3213f32aba3942885d5ea9c4f0edd1c75f14f40c Mon Sep 17 00:00:00 2001 From: D V Date: Wed, 26 Nov 2025 16:03:57 +0100 Subject: [PATCH 04/12] eda with pipeline completed --- .DS_Store | Bin 8196 -> 8196 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/.DS_Store b/.DS_Store index a95fbfd24f88ddd81883ab3d66b7ef9b741dad95..2e2ab3a8d1aca188ed5c3414f62f303878a35b85 100644 GIT binary patch delta 16 XcmZp1XmQx^QDpKpA@|J|qGz}OJ}w5< delta 16 XcmZp1XmQx^QDpLe0r$-oqGz}OKWGO1 From a89d0f126ad247c3e3143579d4f961f495163297 Mon Sep 17 00:00:00 2001 From: D V Date: Wed, 26 Nov 2025 16:02:46 +0100 Subject: [PATCH 05/12] eda with pipeline completed --- src/eda_borrower.py | 76 +++++++++++++++++++-------------------------- 1 file changed, 32 insertions(+), 44 deletions(-) diff --git a/src/eda_borrower.py b/src/eda_borrower.py index 3318a34..ed6d9b3 100644 --- a/src/eda_borrower.py +++ b/src/eda_borrower.py @@ -31,19 +31,9 @@ def __init__(self, df: pd.DataFrame, target_col: str = "loan_status"): self.target_col = target_col def structure_summary(self) -> pd.DataFrame: - """ - Return a DataFrame with one row per column in BORROWER_COLS: - - column: column name - - dtype: pandas dtype - - n_missing: number of missing values - - missing_pct: percentage of missing values - - n_unique: number of unique values - - goal is to create a function that tells me all the information above about the columns in borrower_cols - - if I dont find any data return none - """ - + # Create empty list rows = [] - + # Loop over BORROWER_COLS and if empty fields fill with dict for col in BORROWER_COLS: if col not in self.df.columns: rows.append({ @@ -54,8 +44,10 @@ def structure_summary(self) -> pd.DataFrame: "n_unique" : None }) else: + # create series s = self.df[col] + # fill series with asked information about data rows.append({ "column" : col, "dtype" : s.dtype, @@ -63,52 +55,48 @@ def structure_summary(self) -> pd.DataFrame: "missing_pct" : (s.isna().mean() *100), "n_unique" : s.nunique(dropna=True) }) + # return Dataframe with Information about the data return pd.DataFrame(rows) def income_summary(self) -> pd.DataFrame: - """ - Return basic stats (count, mean, std, min, max, quartiles) - for: - - annual_inc - - annual_inc_joint - - Use df[["annual_inc", "annual_inc_joint"]].describe().T - or equivalent. - """ - + #provide information about two explicit columns via describe().T return self.df[["annual_inc", "annual_inc_joint"]].describe().T - def categorical_freqs(self, max_levels: int = 10) -> Dict[str, pd.Series]: - """ - For important categorical borrower columns (e.g. home_ownership, - addr_state, purpose), return a dict: - - { - "home_ownership": Series of top levels, - "addr_state": Series of top levels, - ... - } - Each Series should be the result of value_counts().head(max_levels). - """ + def categorical_freqs(self, max_levels: int = 10) -> Dict[str, pd.Series]: + # create variable for list of columns cat_cols = ['home_ownership', 'addr_state', 'purpose'] - + # create empty dict to return info result = {} - + # loop over specified columns and write value_counts in dict result for col in cat_cols: if col in self.df.columns: result[col] = self.df[col].value_counts.head(max_levels) else: result[col] = None - + # return dict with result return result + def default_rate_by_category(self, col: str) -> pd.Series: - """ - For a given categorical column (e.g. 'home_ownership' or 'purpose'), - compute the default rate per category. + return self.df.groupby(col)[self.target_col].mean() - Default rate = mean of self.target_col for each category. - Return a pandas Series indexed by category, with values in [0, 1]. - """ - return self.df.groupby(col)[self.target_col].mean() \ No newline at end of file +def borrower_eda_steps(eda: BorrowerProfileEDA) -> Dict[str, Callable[[], Any]]: + + return { + "structure": eda.structure_summary, + "income": eda.income_summary, + "freqs": lambda: eda.categorical_freqs(max_levels=10), + "default_by_home_ownership": lambda: eda.default_rate_by_category("home_ownership"), + "default_by_purpose": lambda: eda.default_rate_by_category("purpose"), + } + + + def run_borrower_eda_pipeline(eda: BorrowerProfileEDA) -> Dict[str, Any]: + steps = borrower_eda_steps(eda) + results = {} + + for name, func in steps.items(): + results[name] = func() # call the stored function + + return results \ No newline at end of file From 44e54af4ac0694d008e23a30adc45ad84af43782 Mon Sep 17 00:00:00 2001 From: D V Date: Thu, 27 Nov 2025 15:22:16 +0100 Subject: [PATCH 06/12] cleaned out mistakes --- src/eda_borrower.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/eda_borrower.py b/src/eda_borrower.py index ed6d9b3..729678e 100644 --- a/src/eda_borrower.py +++ b/src/eda_borrower.py @@ -83,7 +83,7 @@ def default_rate_by_category(self, col: str) -> pd.Series: def borrower_eda_steps(eda: BorrowerProfileEDA) -> Dict[str, Callable[[], Any]]: - return { + return { "structure": eda.structure_summary, "income": eda.income_summary, "freqs": lambda: eda.categorical_freqs(max_levels=10), @@ -92,11 +92,11 @@ def borrower_eda_steps(eda: BorrowerProfileEDA) -> Dict[str, Callable[[], Any]]: } - def run_borrower_eda_pipeline(eda: BorrowerProfileEDA) -> Dict[str, Any]: - steps = borrower_eda_steps(eda) - results = {} +def run_borrower_eda_pipeline(eda: BorrowerProfileEDA) -> Dict[str, Any]: + steps = borrower_eda_steps(eda) + results = {} - for name, func in steps.items(): - results[name] = func() # call the stored function + for name, func in steps.items(): + results[name] = func() # call the stored function - return results \ No newline at end of file + return results \ No newline at end of file From 1549af3e1910d092c140bf72cdb978b39abd7e61 Mon Sep 17 00:00:00 2001 From: D V Date: Thu, 27 Nov 2025 15:49:11 +0100 Subject: [PATCH 07/12] cleaned out mistakes --- src/eda_borrower.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/eda_borrower.py b/src/eda_borrower.py index 729678e..02ad710 100644 --- a/src/eda_borrower.py +++ b/src/eda_borrower.py @@ -71,7 +71,7 @@ def categorical_freqs(self, max_levels: int = 10) -> Dict[str, pd.Series]: # loop over specified columns and write value_counts in dict result for col in cat_cols: if col in self.df.columns: - result[col] = self.df[col].value_counts.head(max_levels) + result[col] = self.df[col].value_counts().head(max_levels) else: result[col] = None # return dict with result From 66783341255ac003f535ac6cfeb2ed7841fd9cbd Mon Sep 17 00:00:00 2001 From: D V Date: Thu, 27 Nov 2025 16:02:42 +0100 Subject: [PATCH 08/12] fixes --- src/.DS_Store | Bin 6148 -> 6148 bytes .../eda_borrower-checkpoint.py | 102 ++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 src/.ipynb_checkpoints/eda_borrower-checkpoint.py diff --git a/src/.DS_Store b/src/.DS_Store index 7b8876ba1c870614d6fff60b6497919012fc4792..5c7fced96d178afa572a26b342d8cc23773820a0 100644 GIT binary patch delta 28 jcmZoMXffC@kCETh(4baFq1w{eR7b(Yz pd.DataFrame: + # Create empty list + rows = [] + # Loop over BORROWER_COLS and if empty fields fill with dict + for col in BORROWER_COLS: + if col not in self.df.columns: + rows.append({ + "column" : col, + "dtype" : None, + "n_missing" : None, + "missing_pct" : None, + "n_unique" : None + }) + else: + # create series + s = self.df[col] + + # fill series with asked information about data + rows.append({ + "column" : col, + "dtype" : s.dtype, + "n_missing" : s.isna().sum(), + "missing_pct" : (s.isna().mean() *100), + "n_unique" : s.nunique(dropna=True) + }) + # return Dataframe with Information about the data + return pd.DataFrame(rows) + + def income_summary(self) -> pd.DataFrame: + #provide information about two explicit columns via describe().T + return self.df[["annual_inc", "annual_inc_joint"]].describe().T + + + def categorical_freqs(self, max_levels: int = 10) -> Dict[str, pd.Series]: + # create variable for list of columns + cat_cols = ['home_ownership', 'addr_state', 'purpose'] + # create empty dict to return info + result = {} + # loop over specified columns and write value_counts in dict result + for col in cat_cols: + if col in self.df.columns: + result[col] = self.df[col].value_counts().head(max_levels) + else: + result[col] = None + # return dict with result + return result + + + def default_rate_by_category(self, col: str) -> pd.Series: + return self.df.groupby(col)[self.target_col].mean() + +def borrower_eda_steps(eda: BorrowerProfileEDA) -> Dict[str, Callable[[], Any]]: + + return { + "structure": eda.structure_summary, + "income": eda.income_summary, + "freqs": lambda: eda.categorical_freqs(max_levels=10), + "default_by_home_ownership": lambda: eda.default_rate_by_category("home_ownership"), + "default_by_purpose": lambda: eda.default_rate_by_category("purpose"), + } + + +def run_borrower_eda_pipeline(eda: BorrowerProfileEDA) -> Dict[str, Any]: + steps = borrower_eda_steps(eda) + results = {} + + for name, func in steps.items(): + results[name] = func() # call the stored function + + return results \ No newline at end of file From b9070dae3172919605f81dc05eec9b2a0e5c0dbd Mon Sep 17 00:00:00 2001 From: D V Date: Thu, 27 Nov 2025 16:03:46 +0100 Subject: [PATCH 09/12] fixes --- .DS_Store | Bin 8196 -> 8196 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/.DS_Store b/.DS_Store index 2e2ab3a8d1aca188ed5c3414f62f303878a35b85..5b0ff84908dab68db8bcb2259114f817889004a5 100644 GIT binary patch delta 41 xcmZp1XmQxUBFJxQXi%%8P;F^!s-s|HU_SYkknv_W!PU%@{|mTpwh%qT1pxiI4EO*5 delta 25 hcmZp1XmQxUA~^Y*h|^{_!PU%@uL-$twh%qT1ps~o34H(n From b4e25bf878ed27886c53c666884de2b56dff5a54 Mon Sep 17 00:00:00 2001 From: D V Date: Fri, 28 Nov 2025 11:13:08 +0100 Subject: [PATCH 10/12] beta for testing --- notebooks/eda_borrower_demo.ipynb | 109 ++++++++++++++++++++++++++---- 1 file changed, 94 insertions(+), 15 deletions(-) diff --git a/notebooks/eda_borrower_demo.ipynb b/notebooks/eda_borrower_demo.ipynb index 192e255..5106165 100644 --- a/notebooks/eda_borrower_demo.ipynb +++ b/notebooks/eda_borrower_demo.ipynb @@ -1,18 +1,4 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } - }, "cells": [ { "cell_type": "code", @@ -74,6 +60,99 @@ " - Contains only EDA (no model training).\n", " \"\"\"" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EFc8nSn-gHZH", + "outputId": "94730639-f261-4b07-ada3-bf996dcbb600", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 349 + } + }, + "outputs": [ + { + "output_type": "error", + "ename": "ModuleNotFoundError", + "evalue": "No module named 'src'", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipython-input-4123335800.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mpandas\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0msrc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0meda_borrower\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mBorrowerProfileEDA\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrun_borrower_eda_pipeline\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"loan.csv\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'src'", + "", + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0;32m\nNOTE: If your import is failing due to a missing package, you can\nmanually install dependencies using either !pip or !apt.\n\nTo view examples of installing some common dependencies, click the\n\"Open Examples\" button below.\n\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n" + ], + "errorDetails": { + "actions": [ + { + "action": "open_url", + "actionText": "Open Examples", + "url": "/notebooks/snippets/importing_libraries.ipynb" + } + ] + } + } + ], + "source": [ + "import pandas as pd\n", + "from src.eda_borrower import BorrowerProfileEDA, run_borrower_eda_pipeline\n", + "df = pd.read_csv(\"/Users/dv/Documents/cloned_repos/ml-model-git-lab/notebooks/data/loan.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "axMvdNZcgHZH" + }, + "outputs": [], + "source": [ + "eda = BorrowerProfileEDA(df, target_col=\"loan_status\")" + ] + }, + { + "cell_type": "code", + "source": [ + "report = run_borrower_eda_pipeline(eda)\n", + "\n", + "report[\"structure\"]\n", + "report[\"income\"]\n", + "report[\"freqs\"]\n", + "report[\"default_by_home_ownership\"]\n", + "report[\"default_by_purpose\"]" + ], + "metadata": { + "id": "dk0YDhTa6TPn" + }, + "execution_count": null, + "outputs": [] } - ] + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.5" + } + }, + "nbformat": 4, + "nbformat_minor": 0 } \ No newline at end of file From eb7e80253dcb41027f7715fde652abe765b0e571 Mon Sep 17 00:00:00 2001 From: D V Date: Fri, 28 Nov 2025 12:30:24 +0100 Subject: [PATCH 11/12] final fixes --- notebooks/eda_borrower_demo.ipynb | 783 ++++++++++++++++++++++++------ 1 file changed, 638 insertions(+), 145 deletions(-) diff --git a/notebooks/eda_borrower_demo.ipynb b/notebooks/eda_borrower_demo.ipynb index 5106165..7be0c57 100644 --- a/notebooks/eda_borrower_demo.ipynb +++ b/notebooks/eda_borrower_demo.ipynb @@ -1,158 +1,651 @@ { - "cells": [ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "knS_VAzEkWuW" + }, + "outputs": [], + "source": [ + "\"\"\"\n", + "1. Load the dataset:\n", + "\n", + " import pandas as pd\n", + " from src.eda_borrower import BorrowerProfileEDA, run_borrower_eda_pipeline\n", + "\n", + " df = pd.read_csv(\"data/loan_sample.csv\") # or the correct path in your repo\n", + "\n", + "\n", + "2. Instantiate the EDA class:\n", + "\n", + " eda = BorrowerProfileEDA(df, target_col=\"loan_status\")\n", + "\n", + "\n", + "3. Run the pipeline and inspect the results:\n", + "\n", + " report = run_borrower_eda_pipeline(eda)\n", + "\n", + "\n", + "4. Display at least:\n", + "\n", + " report[\"structure\"] # table of borrower column structure\n", + " report[\"income\"] # income stats\n", + " report[\"freqs\"] # categorical frequencies\n", + " report[\"default_by_home_ownership\"] # default rate by home_ownership\n", + " report[\"default_by_purpose\"] # default rate by purpose\n", + "\n", + "\n", + "\n", + "You can add markdown cells explaining what each result means in plain language (e.g., class imbalance, missingness, etc.).\n", + "\n", + "Acceptance Criteria ✅\n", + "---------------------\n", + "\n", + "- `BorrowerProfileEDA`:\n", + "\n", + " - Initializes correctly with a DataFrame.\n", + " - `structure_summary()` returns a DataFrame with the requested columns/metrics.\n", + " - `income_summary()` returns a DataFrame with stats for `annual_inc` and `annual_inc_joint`.\n", + " - `categorical_freqs()` returns a dict of Series with top categories.\n", + " - `default_rate_by_category(col)` returns a Series of default rates per category.\n", + "- Functional pipeline:\n", + "\n", + " - `borrower_eda_steps(eda)` returns a dict of callables.\n", + " - `run_borrower_eda_pipeline(eda)` iterates over that dict, calls each function, and returns a dict of results.\n", + "- Notebook:\n", + "\n", + " - Runs top-to-bottom without errors.\n", + " - Shows the structure summary, income summary, categorical frequencies, and default-rate-by-category analysis.\n", + " - Contains only EDA (no model training).\n", + " \"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "if not os.path.exists(\"../src/eda_borrower.py\"):\n", + " raise FileNotFoundError(\"⚠️ eda_borrower.py missing in src/. Check your repo structure!\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import os\n", + "\n", + "repo_root = r\"/Users/dv/Documents/cloned_repos/ml-model-git-lab\"\n", + "sys.path.insert(0, repo_root)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "knS_VAzEkWuW" - }, - "outputs": [], - "source": [ - "\"\"\"\n", - "1. Load the dataset:\n", - "\n", - " import pandas as pd\n", - " from src.eda_borrower import BorrowerProfileEDA, run_borrower_eda_pipeline\n", - "\n", - " df = pd.read_csv(\"data/loan_sample.csv\") # or the correct path in your repo\n", - "\n", - "\n", - "2. Instantiate the EDA class:\n", - "\n", - " eda = BorrowerProfileEDA(df, target_col=\"loan_status\")\n", - "\n", - "\n", - "3. Run the pipeline and inspect the results:\n", - "\n", - " report = run_borrower_eda_pipeline(eda)\n", - "\n", - "\n", - "4. Display at least:\n", - "\n", - " report[\"structure\"] # table of borrower column structure\n", - " report[\"income\"] # income stats\n", - " report[\"freqs\"] # categorical frequencies\n", - " report[\"default_by_home_ownership\"] # default rate by home_ownership\n", - " report[\"default_by_purpose\"] # default rate by purpose\n", - "\n", - "\n", - "\n", - "You can add markdown cells explaining what each result means in plain language (e.g., class imbalance, missingness, etc.).\n", - "\n", - "Acceptance Criteria ✅\n", - "---------------------\n", - "\n", - "- `BorrowerProfileEDA`:\n", - "\n", - " - Initializes correctly with a DataFrame.\n", - " - `structure_summary()` returns a DataFrame with the requested columns/metrics.\n", - " - `income_summary()` returns a DataFrame with stats for `annual_inc` and `annual_inc_joint`.\n", - " - `categorical_freqs()` returns a dict of Series with top categories.\n", - " - `default_rate_by_category(col)` returns a Series of default rates per category.\n", - "- Functional pipeline:\n", - "\n", - " - `borrower_eda_steps(eda)` returns a dict of callables.\n", - " - `run_borrower_eda_pipeline(eda)` iterates over that dict, calls each function, and returns a dict of results.\n", - "- Notebook:\n", - "\n", - " - Runs top-to-bottom without errors.\n", - " - Shows the structure summary, income summary, categorical frequencies, and default-rate-by-category analysis.\n", - " - Contains only EDA (no model training).\n", - " \"\"\"" + "data": { + "text/plain": [ + "" ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import importlib\n", + "import src.eda_borrower\n", + "importlib.reload(src.eda_borrower)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from src.eda_borrower import BorrowerProfileEDA, run_borrower_eda_pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/Users/dv/Documents/cloned_repos/ml-model-git-lab/src/eda_borrower.py\n" + ] + } + ], + "source": [ + "print(src.eda_borrower.__file__)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 349 }, + "id": "EFc8nSn-gHZH", + "outputId": "94730639-f261-4b07-ada3-bf996dcbb600" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "EFc8nSn-gHZH", - "outputId": "94730639-f261-4b07-ada3-bf996dcbb600", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 349 - } - }, - "outputs": [ - { - "output_type": "error", - "ename": "ModuleNotFoundError", - "evalue": "No module named 'src'", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m/tmp/ipython-input-4123335800.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mpandas\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0msrc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0meda_borrower\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mBorrowerProfileEDA\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrun_borrower_eda_pipeline\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"loan.csv\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'src'", - "", - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0;32m\nNOTE: If your import is failing due to a missing package, you can\nmanually install dependencies using either !pip or !apt.\n\nTo view examples of installing some common dependencies, click the\n\"Open Examples\" button below.\n\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n" - ], - "errorDetails": { - "actions": [ - { - "action": "open_url", - "actionText": "Open Examples", - "url": "/notebooks/snippets/importing_libraries.ipynb" - } - ] - } - } + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/gn/zvx5kxqj5ng9ng0cg5f15yk80000gn/T/ipykernel_62771/4134569851.py:3: DtypeWarning: Columns (19,55) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " df = pd.read_csv(\"/Users/dv/Documents/cloned_repos/ml-model-git-lab/notebooks/data/loan.csv\")\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "from src.eda_borrower import BorrowerProfileEDA, run_borrower_eda_pipeline\n", + "df = pd.read_csv(\"/Users/dv/Documents/cloned_repos/ml-model-git-lab/notebooks/data/loan.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "default_map = {\n", + " \"Fully Paid\": 0,\n", + " \"Current\": 0,\n", + " \"In Grace Period\": 0,\n", + " \"Issued\": 0,\n", + " \"Does not meet the credit policy. Status:Fully Paid\": 0,\n", + "\n", + " \"Charged Off\": 1,\n", + " \"Default\": 1,\n", + " \"Late (31-120 days)\": 1,\n", + " \"Late (16-30 days)\": 1,\n", + " \"Does not meet the credit policy. Status:Charged Off\": 1\n", + "}\n", + "\n", + "df[\"loan_status_binary\"] = df[\"loan_status\"].map(default_map)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "axMvdNZcgHZH" + }, + "outputs": [], + "source": [ + "eda = BorrowerProfileEDA(df, target_col=\"loan_status_binary\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "id": "dk0YDhTa6TPn" + }, + "outputs": [], + "source": [ + "report = run_borrower_eda_pipeline(eda)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
columndtypen_missingmissing_pctn_unique
0idint6400.000000887379
1member_idint6400.000000887379
2emp_titleobject514625.799326299271
3emp_lengthobject448255.05139311
4home_ownershipobject00.0000006
5annual_incfloat6440.00045149384
6annual_inc_jointfloat6488686899.942415308
7verification_statusobject00.0000003
8verification_status_jointobject88686899.9424153
9zip_codeobject00.000000935
10addr_stateobject00.00000051
11purposeobject00.00000014
12titleobject1530.01724263143
13descobject76135385.797951124468
14issue_dobject00.000000103
15pymnt_planobject00.0000002
16policy_codefloat6400.0000001
17urlobject00.000000887379
\n", + "
" ], - "source": [ - "import pandas as pd\n", - "from src.eda_borrower import BorrowerProfileEDA, run_borrower_eda_pipeline\n", - "df = pd.read_csv(\"/Users/dv/Documents/cloned_repos/ml-model-git-lab/notebooks/data/loan.csv\")" + "text/plain": [ + " column dtype n_missing missing_pct n_unique\n", + "0 id int64 0 0.000000 887379\n", + "1 member_id int64 0 0.000000 887379\n", + "2 emp_title object 51462 5.799326 299271\n", + "3 emp_length object 44825 5.051393 11\n", + "4 home_ownership object 0 0.000000 6\n", + "5 annual_inc float64 4 0.000451 49384\n", + "6 annual_inc_joint float64 886868 99.942415 308\n", + "7 verification_status object 0 0.000000 3\n", + "8 verification_status_joint object 886868 99.942415 3\n", + "9 zip_code object 0 0.000000 935\n", + "10 addr_state object 0 0.000000 51\n", + "11 purpose object 0 0.000000 14\n", + "12 title object 153 0.017242 63143\n", + "13 desc object 761353 85.797951 124468\n", + "14 issue_d object 0 0.000000 103\n", + "15 pymnt_plan object 0 0.000000 2\n", + "16 policy_code float64 0 0.000000 1\n", + "17 url object 0 0.000000 887379" ] - }, + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "report[\"structure\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "axMvdNZcgHZH" - }, - "outputs": [], - "source": [ - "eda = BorrowerProfileEDA(df, target_col=\"loan_status\")" + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countmeanstdmin25%50%75%max
annual_inc887375.075027.58776164698.3001420.045000.065000.090000.09500000.0
annual_inc_joint511.0109981.01158552730.37984717950.076032.5101771.0132800.0500000.0
\n", + "
" + ], + "text/plain": [ + " count mean std min 25% \\\n", + "annual_inc 887375.0 75027.587761 64698.300142 0.0 45000.0 \n", + "annual_inc_joint 511.0 109981.011585 52730.379847 17950.0 76032.5 \n", + "\n", + " 50% 75% max \n", + "annual_inc 65000.0 90000.0 9500000.0 \n", + "annual_inc_joint 101771.0 132800.0 500000.0 " ] - }, + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "report[\"income\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "source": [ - "report = run_borrower_eda_pipeline(eda)\n", - "\n", - "report[\"structure\"]\n", - "report[\"income\"]\n", - "report[\"freqs\"]\n", - "report[\"default_by_home_ownership\"]\n", - "report[\"default_by_purpose\"]" - ], - "metadata": { - "id": "dk0YDhTa6TPn" - }, - "execution_count": null, - "outputs": [] + "data": { + "text/plain": [ + "{'home_ownership': home_ownership\n", + " MORTGAGE 443557\n", + " RENT 356117\n", + " OWN 87470\n", + " OTHER 182\n", + " NONE 50\n", + " ANY 3\n", + " Name: count, dtype: int64,\n", + " 'addr_state': addr_state\n", + " CA 129517\n", + " NY 74086\n", + " TX 71138\n", + " FL 60935\n", + " IL 35476\n", + " NJ 33256\n", + " PA 31393\n", + " OH 29631\n", + " GA 29085\n", + " VA 26255\n", + " Name: count, dtype: int64,\n", + " 'purpose': purpose\n", + " debt_consolidation 524215\n", + " credit_card 206182\n", + " home_improvement 51829\n", + " other 42894\n", + " major_purchase 17277\n", + " small_business 10377\n", + " car 8863\n", + " medical 8540\n", + " moving 5414\n", + " vacation 4736\n", + " Name: count, dtype: int64}" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.13.5" + ], + "source": [ + "report[\"freqs\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "home_ownership\n", + "ANY 0.000000\n", + "MORTGAGE 0.060520\n", + "NONE 0.160000\n", + "OTHER 0.208791\n", + "OWN 0.064662\n", + "RENT 0.080395\n", + "Name: loan_status_binary, dtype: float64" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" } + ], + "source": [ + "report[\"default_by_home_ownership\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "purpose\n", + "car 0.062733\n", + "credit_card 0.051435\n", + "debt_consolidation 0.071745\n", + "educational 0.208038\n", + "home_improvement 0.061471\n", + "house 0.102509\n", + "major_purchase 0.067662\n", + "medical 0.087588\n", + "moving 0.104174\n", + "other 0.089826\n", + "renewable_energy 0.111304\n", + "small_business 0.164017\n", + "vacation 0.077069\n", + "wedding 0.121858\n", + "Name: loan_status_binary, dtype: float64" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "report[\"default_by_purpose\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From 5b35bee27849b9f5d16f354014cd18aec092f31c Mon Sep 17 00:00:00 2001 From: D V Date: Fri, 28 Nov 2025 13:47:40 +0100 Subject: [PATCH 12/12] add gitignore --- .DS_Store | Bin 8196 -> 8196 bytes .gitignore | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/.DS_Store | Bin 6148 -> 6148 bytes 3 files changed, 58 insertions(+) create mode 100644 .gitignore diff --git a/.DS_Store b/.DS_Store index 5b0ff84908dab68db8bcb2259114f817889004a5..854aed8ccfad65f1101329d6c488a13de677d169 100644 GIT binary patch delta 20 ccmZp1XmQwZO=$9e0oKXiM4UFu3P0xo0A6?qeE0iX0@MSZk++ZY;0AMi3DW#ZJ_OLr^!aQL(b|2N?VbwstA+FWK82 zHydrNWWU~fGdufckN(j=l`W@`tjbBVDx9SEd+{t=IOh*>JGclDV2lwaP|zF-JJvLs zh_H|1NoTW;HGnq0vBw%Q(FG(1WX`c+9I`6$MU|LKqR-qYm}-D+hAB*++$C8Cnew(e zU`cAg=QYoWk_K83YH(LX!Yuy5jMo~B$81!Rr70lU)_Vipr-5Sj(yP_t!)paKv`~L| gPKEU+(p=un^VgW%?Xz3@^aQ|}ZI(IHVd-lB1Kq$yo&W#< delta 76 zcmZoMXfc=|#>B`mu~2NHo+2aD#DLwC4MbQb^Rv9%EW#nova#V0<7Rdaeh#3T&4L`? ZnJ4p$SPC!z0V4wg6O?Az93irX836KI5Uv0K