#Step 1: import required libraries import pandas as pd import numpy as np import seaborn as sns import statsmodels.api as sm import matplotlib.pyplot as plt from sklearn.metrics import confusion_matrix, accuracy_score #Step 2: Import Dataset from Web Storage url "https://stats.idre.ucla.edu/stat/data/binary.csv" data pd.read csv(url) print("First 5 Records of Dataset") print(data.head()) First 5 Records of Dataset admit gre gpa rank 0 0 380 3.61 3 1 660 3.67 3 2 1 800 4.00 1 E 1 640 3.19 4 4 4 0520 2.93 4 #Step 3: Dataset Information print("\nDataset Information") print(data.info()) print("\nStatistical Summary") print(data.describe()) print("\nMissing Values") print(data.isnull().sum()) Dataset Information pandas.core.frame.DataFram Range Index: 400 entries, e to columns (total al 4 columns): Colum Non-Null Count Dtype rank -null 100 non-nul 400 поп-null dtypes: float64(1), int64(3) memory usage: 12.6 KB None Statistical Summary floate intoa admit count 400.000000 17500 0.466687 15.516536 0.380567 miri 0.000000 0.000000 0.000000 580.000 400.000000 700000 20. 520.000000 660.00000 400.000000 000 2.260000 3.130000 25% .000 50% 3.395000 1.000 75% Missing Values nk 3.670000 1.000 ype: int64#Step 4: Convert Rank to Dummy Variables rank_dummies pd.get_dummies (data['rank'], prefix='rank', drop_first-True) data pd.concat([data, rank dummies], axis=1) data data.drop('rank, axis=1) print("\nDataset after creating dummy variables") print(data.head()) Dataset after creating dummy variables admit gre gpa rank 2 rank 3 rank 4 e 0 380 3.61 False True False 1 1 660 3.67 False True False 2 1 800 4.00 False False False 1 640 3.19 False False False True 4 0520 2.93 False True #Step 5: Define Dependent and Independent Variables X = data[[gre", "gpa', 'rank 2", "rank 3', 'rank_4"]] X sm.add_constant(X) data['admit'] Add constant #Convert boolean columns to integers (e and 1) for statsmodels X[['rank 2', 'rank 3', 'rank_4']] X[['rank_2', 'rank_3', 'rank_4']].astype(int) #Step 6: Build Logistic Regression Mode logit model sm. Logit(y, x) result logit model.fit() print("\nLogistic Regression Summary") print(result.summary()) Optimization terminated successfully. Current function value: 0.573147 Iterations 6 Logistic Regression Summary Logit Regression Results Dep. Variable: Model: Logit Method: MLE Date: Wed, 11 Mar 2026 Time: 13:55:19 converged: True admit No. Observations: Df Residuals: Df Model: Pseudo R-squ.: Log-Likelihood: LL-Null: 400 394 5 0.08292 -229.26 -249.99 Covariance Type: nonrobust LLR p-value: 7.578e-08 coef std err Z P>z [0.025 0.975] const -3.9900 1.140 -3.500 0.000 -6.224 -1.756 gre 0.0023 0.001 2.070 0.038 0.000 0.004 gpa 0.8040 0.332 2.423 0.015 0.154 1.454 rank 2 -0.6754 0.316 -2.134 0.033 -1.296 -0.055 rank 3 -1.3402 0.345 -3.881 0.000 -2.017 -0.663 rank 4 -1.5515 0.418 -3.713 0.000 -2.370 -0.733#Step 7: Predict Admission Probability predictions result.predict(x) data['predicted_probability'] = predictions print("\nPredicted Probabilities") print(data[['admit', predicted_probability']].head()) Predicted Probabilities admit predicted_probability 0.172627 0.292175 0.738408 0.178385 0.118354 #Step 8: Convert Probability to Binary Class data['predicted_admit'] = (data['predicted_probability'] >= 0.5).astype(int) print("\nPredicted Admission") print(data[['admit', 'predicted_admit']].head()) Predicted Admission admit predicted admit 1 1 2 1 3 1 4 0 1 0 #Step 9: Evaluate Model Performance cm confusion_matrix(y, data['predicted_admit"]) accuracy accuracy_score(y, data['predicted_admit*]) print("\nConfusion Matrix") print(cm) print("\nModel Accuracy:", accuracy) Confusion Matrix [[254 19] [97 30]] Model Accuracy: 0.71 # Step 10: Visualization plt.figure() sns.scatterplot(x='gre', y='admit', data=data) plt.title("GRE Score vs Admission") plt.show() plt.figure() sns.scatterplot(x='gpa', y='admit', data=data) plt.title("GPA vs Admission") plt.show()