Import libraries and load data import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder from sklearn.linear_model import LogisticRegression from sklearn.tree import DecisionTreeClassifier from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
df = pd.read_csv('loan_prediction.csv')
print(df.head())
print(df.isnull().sum())
df['LoanAmount'].fillna(df['LoanAmount'].mean(), inplace=True) df['Credit_History'].fillna(df['Credit_History'].mode()[0], inplace=True)
df.dropna(inplace=True)
plt.figure(figsize=(8, 6)) sns.histplot(df['LoanAmount'], bins=20, kde=True) plt.title('Loan Amount Distribution') plt.show()
plt.figure(figsize=(6, 4)) sns.countplot(x='Education', data=df) plt.title('Education Level') plt.show()
plt.figure(figsize=(8, 6)) sns.histplot(df['ApplicantIncome'], bins=20, kde=True) plt.title('Applicant Income Distribution') plt.show()
le = LabelEncoder() df['Gender'] = le.fit_transform(df['Gender']) df['Married'] = le.fit_transform(df['Married']) df['Education'] = le.fit_transform(df['Education']) df['Self_Employed'] = le.fit_transform(df['Self_Employed']) df['Property_Area'] = le.fit_transform(df['Property_Area'])
X = df.drop(['Loan_Status'], axis=1) y = df['Loan_Status'].map({'Y': 1, 'N': 0})
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LogisticRegression() model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred) print("Model Accuracy:", accuracy)
cm = confusion_matrix(y_test, y_pred) print("Confusion Matrix:") print(cm)
print("Classification Report:") print(classification_report(y_test, y_pred))